Mande valency

Last update: 2024-01-12

Для всех пар языков одно наблюдение на каждый стимул. Здесь одно и то же расстояние по хитмэпу, кластеризации:

Code
df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  arrange(language)  |> 
  distinct(language, number, construction_type) |>  
  pairwise_count(language, number) |> 
  rename(total = n) ->
  df_pairwise_total

df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  group_by(number) |> 
  arrange(language) |> 
  pairwise_count(language, construction_type)   |> 
  mutate(n = 1) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) -> 
  df_pairwise_within_construction

df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |>  
  mutate(percentage = round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  mutate(item1 = factor(item1, levels = c("Guro", "Dan Gweetaa", "Mano", "Kpelle", "Kono", "Looma", "Bamana")),
         item2 = factor(item2, levels = c("Guro", "Dan Gweetaa", "Mano", "Kpelle", "Kono", "Looma", "Bamana"))) |> 
  ggplot(aes(item1, item2))+
  geom_tile(aes(fill = percentage), colour = "white") +
  geom_text(aes(label = str_c(percentage, "%")), colour = "white") +
  scale_fill_gradient(low = "lightblue", high = "navy")+
  coord_fixed()+
  labs(x = "", y = "", title = "Heatmap with all stimuli") +
  theme(legend.position = "bottom")

Code
df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |>  
  mutate(percentage = 100-round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  pivot_wider(names_from = item2, values_from = percentage) |> 
  arrange(item1) |> 
  select(-item1) |> 
  as.dist() ->
  dist_gold_standard

library("ape")
dist_gold_standard |>   
  hclust() |> 
  as.phylo() %>% 
  plot(main = "Clusterization with all stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Code
dist_gold_standard |> 
  neighborNet() |> 
  plot()
title(main = "Heatmap for all stimuli")

Все примеры, без фильтрации

Code
df_wide |>
  mutate(across(Guro:Bamana, function(x){ifelse(is.na(x), 0, 1)})) |> 
  select(Guro:Bamana) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Все примеры, без фильтрации",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Все примеры, без фильтрации, одно наблюдение на стимул

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  arrange(language)  |> 
  distinct(language, number, construction_type) |>  
  pairwise_count(language, number) |> 
  rename(total = n) ->
  df_pairwise_total

Для всех пар языков одно наблюдение на каждый стимул. Стимулов в каждой паре языков разное количество:

Code
df_pairwise_total |> 
  pivot_wider(names_from = item2, values_from = total) |> 
  select(item1, Dan_Gweetaa, Guro, Kono, Kpelle, Looma, Mano, Bamana) |> 
  knitr::kable()
item1 Dan_Gweetaa Guro Kono Kpelle Looma Mano Bamana
Dan_Gweetaa NA 94 101 105 95 107 108
Guro 94 NA 99 98 96 102 105
Kono 101 99 NA 110 101 113 113
Kpelle 105 98 110 NA 100 115 114
Looma 95 96 101 100 NA 104 106
Mano 107 102 113 115 104 NA 118
Bamana 108 105 113 114 106 118 NA
Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  group_by(number) |> 
  arrange(language) |> 
  pairwise_count(language, construction_type)   |> 
  mutate(n = 1) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) -> 
  df_pairwise_within_construction

df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |>  
  mutate(percentage = 100-round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  pivot_wider(names_from = item2, values_from = percentage) |> 
  arrange(item1) |> 
  select(-item1) |> 
  as.dist() |>   
  hclust() |> 
  as.phylo() %>% 
  plot(main = "Clusterization with all stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Только стимулы, для которых есть все языки, одно наблюдение на стимул

Code
df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  arrange(language)  |> 
  distinct(language, number, construction_type) |>  
  pairwise_count(language, number) |> 
  rename(total = n) ->
  df_pairwise_total

Всего 83 стимула в каждом языке:

Code
df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  group_by(number) |> 
  arrange(language) |> 
  pairwise_count(language, construction_type)   |> 
  mutate(n = 1) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) -> 
  df_pairwise_within_construction

df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |>  
  mutate(percentage = 100-round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  pivot_wider(names_from = item2, values_from = percentage) |> 
  arrange(item1) |> 
  select(-item1) |> 
  as.dist() |>   
  hclust() |> 
  as.phylo() %>% 
  plot(main = "Clusterization with common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Только стимулы, для которых есть все языки, случайная конструкция для каждого языка на стимул

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary")  |> 
  hclust(method = "average") |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1)  |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Я вынул из таблицы данные по дан, гуро, коно, кпелле, лоома, мано и сделал столбцы

  • number
  • cunstruction
  • exteded_cunstruction
  • postposition
  • language

Видимо, все эти примеры надо поправить. Сколько у нас наблюдений по каждому языку?

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  count(language)
language n
Bamana 157
Dan_Gweetaa 134
Guro 120
Kono 115
Kpelle 145
Looma 115
Mano 212

А пока давайте посмотрим на общую таблицу:

Code
df_wide <- readxl::read_xlsx("../GM_merged_wide.xlsx")

Можно посчитать, сколько раз пары языков заполняют один и тот же стимул:

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  arrange(language) |> 
  distinct(language, number) |> 
  pairwise_count(language, number) |> 
  rename(total = n) ->
  df_pairwise_total

df_pairwise_total |> 
  pivot_wider(names_from = item2, values_from = total) |> 
  arrange(item1)
item1 Bamana Dan_Gweetaa Guro Kono Kpelle Looma Mano
Bamana NA 108 105 113 114 106 118
Dan_Gweetaa 108 NA 94 101 105 95 107
Guro 105 94 NA 99 98 96 102
Kono 113 101 99 NA 110 101 113
Kpelle 114 105 98 110 NA 100 115
Looma 106 95 96 101 100 NA 104
Mano 118 107 102 113 115 104 NA

Можно посчитать, сколько раз пары языков заполняют одну и ту же конструкцию (т. е. совпадают в конструкции, в одном и том же стимуле):

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  group_by(number) |> 
  arrange(language) |> 
  pairwise_count(language, construction_type) |> 
  mutate(n = 1) |> # to unify
  group_by(item1, item2) |> 
  reframe(n = sum(n)) ->
  df_pairwise_within_construction

df_pairwise_within_construction |>
  pivot_wider(names_from = item2, values_from = n) |> 
  arrange(item1) |> 
  select(item1, Dan_Gweetaa, Guro, Kono, Kpelle, Looma, Mano, Bamana) 
item1 Dan_Gweetaa Guro Kono Kpelle Looma Mano Bamana
Bamana 50 54 47 52 54 54 NA
Dan_Gweetaa NA 61 51 56 56 64 50
Guro 61 NA 52 60 57 65 54
Kono 51 52 NA 89 58 85 47
Kpelle 56 60 89 NA 65 95 52
Looma 56 57 58 65 NA 67 54
Mano 64 65 85 95 67 NA 54

Мне кажется, что чтобы посчитать процент совпадений, о котором просила Маша, нужно разделить последнюю таблицу на предпоследнюю:

Code
df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |> 
  mutate(percentage = round(n/total*100, 3)) |> 
  select(item1, item2, percentage) |> 
  pivot_wider(names_from = item2, values_from = percentage) |> 
  arrange(item1) |> 
  select(item1, Dan_Gweetaa, Guro, Kono, Kpelle, Looma, Mano, Bamana)
item1 Dan_Gweetaa Guro Kono Kpelle Looma Mano Bamana
Bamana 46.296 51.429 41.593 45.614 50.943 45.763 NA
Dan_Gweetaa NA 64.894 50.495 53.333 58.947 59.813 46.296
Guro 64.894 NA 52.525 61.224 59.375 63.725 51.429
Kono 50.495 52.525 NA 80.909 57.426 75.221 41.593
Kpelle 53.333 61.224 80.909 NA 65.000 82.609 45.614
Looma 58.947 59.375 57.426 65.000 NA 64.423 50.943
Mano 59.813 63.725 75.221 82.609 64.423 NA 45.763

Можно построить тепловую карту:

Code
df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |> 
  mutate(percentage = round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  ggplot(aes(item1, item2))+
  geom_tile(aes(fill = percentage), colour = "white") +
  geom_text(aes(label = str_c(percentage, "%")), colour = "white") +
  scale_fill_gradient(low = "lightblue", high = "navy")+
  coord_fixed()+
  labs(x = "", y = "") +
  theme(legend.position = "bottom")

Теперь можем провести быструю кластеризацию:

Code
df_wide |>
  mutate(across(Guro:Bamana, function(x){ifelse(is.na(x), 0, 1)})) |> 
  select(Guro:Bamana) |> 
  t() |> 
  dist(method = "binary")  ->
  all_stimuli

all_stimuli |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Все примеры, без фильтрации",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Code
all_stimuli |> 
  neighborNet() |> 
  plot()
title(main = "Все примеры, без фильтрации")

Можно еще посмотреть на кластеризацию конструкций, но это не поместиться на экран. Однако я считаю, что это было бы интересно Сереже.

Вот что будет, если взять только первые строки для каждого языка:

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |>
  group_by(number, language) |> 
  slice(1)  |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1) |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary")  ->
  sample_first_construction

sample_first_construction |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Выборка с первыми строками для каждого языка",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5,
       font = 2)

Code
sample_first_construction |> 
  neighborNet() |> 
  plot()
title(main = "Выборка с первыми строками для каждого языка")

МХ: А что будет, если выбрать случайный эквивалент, а не первую строку?

Сегодня (07-10-2023) я вижу следующее:

  • Бамана всегда дальше всех
  • Группа [Mano [Kono Kpelle]] всегда вместе
  • Группа [Dan Gweetaa Guro] всегда вместе
  • Единственная вариация проиходит с Looma он встречается в следующих комбинациях:
    • [Dan Gweetaa [Guro Looma]]
    • [[Dan Gweetaa Guro] Looma]
    • [Looma [Mano [Kono Kpelle]]]
Code
set.seed(42)
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1) |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary")  ->
  sample_random_construction

sample_random_construction |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Clusterization with random equivalents",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
sample_random_construction |> 
  neighborNet() |> 
  plot()
title(main = "Выборка со случайным эквивалентом для каждого языка")

Вот что будет, если взять только популярные конструкции:

Code
df_wide |> 
  group_by(number) |> 
  filter(n == max(n))  |> 
  ungroup() |> 
  mutate(across(Guro:Bamana, function(x){ifelse(is.na(x), 0, 1)})) |> 
  select(Guro:Bamana) |> 
  t() |> 
  dist(method = "binary")  ->
  sample_popular_construction

sample_popular_construction |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Выборка с популярынми конструкциями",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
sample_popular_construction  |> 
  neighborNet() |> 
  plot()
title(main = "Выборка с популярынми конструкциями")

МХ: я также думаю что можно попробовать проанализировать только те стимулы где у ВСЕХ языков есть эквиваленты

Всего стимулов 130, но некоторые из них не попали в датасет: 112, 114, 122, 125, 128

Code
df |> 
  count(number, language) |> 
  count(number) |> 
  filter(n == 7) |> 
  pull(number) ->
  to_keep

Вот список стимулов, для которых есть данные всех семи языков (всего таких случаев 83):

Code
to_keep
 [1]   1   2   3   4   6   7   8   9  11  12  14  16  17  18  19  22  23  25  26
[20]  27  28  29  31  32  33  34  35  36  38  39  40  41  43  44  46  49  50  51
[39]  53  54  55  57  58  60  63  66  67  68  69  70  71  72  74  75  76  77  78
[58]  79  80  81  83  85  89  90  91  93  94  96  98  99 100 101 102 103 105 106
[77] 108 109 110 117 118 119 121

Или наоборот, вот номера, для которых не во всех языках есть данные:

Code
df |> 
  count(number, language) |> 
  count(number) |> 
  filter(n < 7) |> 
  pull(number)
 [1]   5  10  13  15  20  21  24  30  37  42  45  47  48  52  56  59  61  62  64
[20]  65  73  82  84  86  87  88  92  95  97 104 107 111 113 115 116 120 123 124
[39] 126 127 129 130

Запустим кластеризацию:

Code
df_wide |> 
  filter(number %in% to_keep) |> 
  mutate(across(Guro:Bamana, function(x){ifelse(is.na(x), 0, 1)}))  |> 
  select(Guro:Bamana) |> 
  t() |> 
  dist(method = "binary") ->
  stimuli_sample_all_langs

stimuli_sample_all_langs |> 
  hclust() |> 
  as.phylo() %>%
  plot(main = "Clusterization with common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
stimuli_sample_all_langs  |> 
  neighborNet() |> 
  plot()
title(main = "Выборка стимулов со всеми 6 языками")

Code
df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  arrange(language)  |> 
  distinct(language, number, construction_type)  |> 
  pairwise_count(language, number) |> 
  rename(total = n) ->
  df_pairwise_total

df |> 
  filter(number %in% to_keep,
         !str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |> 
  group_by(number) |> 
  arrange(language) |> 
  pairwise_count(language, construction_type)  |> 
  mutate(n = 1) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) -> 
  df_pairwise_within_construction

df_pairwise_total |> 
  left_join(df_pairwise_within_construction) |>  
  mutate(percentage = round(n/total*100, 2),
         item1 = case_match(item1, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item1),
         item2 = case_match(item2, 
                            "Dan_Gweetaa" ~ "Dan Gweetaa",
                            .default = item2)) |> 
  select(item1, item2, percentage) |> 
  ggplot(aes(item1, item2))+
  geom_tile(aes(fill = percentage), colour = "white") +
  geom_text(aes(label = str_c(percentage, "%")), colour = "white") +
  scale_fill_gradient(low = "lightblue", high = "navy")+
  coord_fixed()+
  labs(x = "", y = "", title = "Выборка стимулов со всеми 6 языками") +
  theme(legend.position = "bottom")

Интересно, что выборка со случайным эквивалентом и выборка только тех стимулов, где есть данные по всем шести языкам, дает такие разные результаты. Что будет если взять выборку данных по всем языкам и В НЕЙ провести случайную выборку эквивалентной конструкции?

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         number %in% to_keep) |>
  group_by(number, language) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(number, language, construction_type) |> 
  mutate(construction_type2 = 1) |> 
  pivot_wider(names_from = language, values_from = construction_type2, values_fill = 0) |> 
  select(Bamana:Mano)  |> 
  t() |> 
  dist(method = "binary") ->
  stimuli_sample_all_langs_and_random

stimuli_sample_all_langs_and_random |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "clusterization with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
stimuli_sample_all_langs_and_random  |> 
  neighborNet() |> 
  plot()
title(main = "Выборка стимулов со всеми 6 языками  (случайный эквивалент)")

Перед тем, как приступить к перекодированию, которое предложила Маша К., посмотрим статистику конструкций по языкам:

Code
df |> 
  count(language, construction_type) |>
  group_by(construction_type) |> 
  mutate(overall = sum(n)) |>  
  pivot_wider(names_from = language, values_from = n, values_fill = 0) |> 
  arrange(desc(overall)) |> 
  select(construction_type, overall, Guro, Dan_Gweetaa, Mano, Kpelle, Kono, Looma, Bamana)
construction_type overall Guro Dan_Gweetaa Mano Kpelle Kono Looma Bamana
X Aux Y V 296 43 37 45 36 32 42 61
X Aux V Y Ad 180 23 22 36 23 20 21 35
X Aux Y N V 100 5 11 35 21 12 9 7
X Aux N V Y Ad 68 12 15 10 6 7 12 6
X Aux V loc 37 6 5 8 7 5 6 0
Xinal N Aux V Y Ad 36 8 3 13 5 2 4 1
X Aux Xrefl N V Y Ad 33 4 2 11 7 5 0 4
Y be X Ad 19 2 1 4 4 4 2 2
Yinal N be X Ad 18 0 2 4 5 4 2 1
Xinal N be Y Ad 16 1 1 5 3 2 3 1
Y Aux V X Ad 16 3 2 2 1 2 2 4
X be Y Ad 15 1 1 2 3 4 1 3
Yinal N Aux V X Ad 13 0 2 7 3 1 0 0
X+Y Aux N V 12 0 0 5 5 2 0 0
Yinal N Aux X V 10 0 7 1 1 1 0 0
X+Y Aux V 9 1 0 0 3 1 1 3
X Aux Xrefl V Y Ad 8 1 1 2 1 1 1 1
Y Aux X N V 8 1 2 2 2 0 0 1
X Aux N V 8 0 1 2 2 2 1 0
Y Aux X V 6 0 1 0 0 0 0 5
Xinal Y Aux V 4 1 1 1 0 0 1 0
Xinal Y Aux Xrefl V 4 0 0 1 2 1 0 0
X+Y be N Ad 3 0 0 0 1 1 0 1
X Aux 3sg V Y Ad 3 1 1 1 0 0 0 0
Yinal N be.neg X Ad 3 0 0 1 1 1 0 0
X+Y Aux V X+Yrefl Ad 3 0 0 3 0 0 0 0
X Aux V Y N Ad 2 0 0 0 0 0 0 2
X V Aux Y Ad 2 0 0 0 0 0 0 2
X be X Ad 2 0 0 0 0 0 0 2
X be Y N Ad 2 0 1 0 0 0 0 1
Xinal Y Aux X V 2 0 1 0 0 0 0 1
Y Aux Y V 2 0 0 0 0 0 0 2
X Aux Xrefl N V Y N Ad 2 0 2 0 0 0 0 0
X be Y 2 1 1 0 0 0 0 0
Z Aux X V Y 2 1 1 0 0 0 0 0
Xinal N Aux N V Y Ad 2 2 0 0 0 0 0 0
X be.neg Y Ad 2 0 0 1 1 0 0 0
N adj be X Ad Vinf Y Ad 1 0 0 0 0 0 0 1
X Aux V Aux V Y Ad 1 0 0 0 0 0 0 1
X Aux Xrefl int V Y Ad 1 0 0 0 0 0 0 1
X Aux Y Ad 1 0 0 0 0 0 0 1
X Aux Y V Refl Ad 1 0 0 0 0 0 0 1
X Aux Z V Y Ad 1 0 0 0 0 0 0 1
X N V Y 1 0 0 0 0 0 0 1
Xinal N Aux Y Ad 1 0 0 0 0 0 0 1
Y Aux V Aux V X Ad 1 0 0 0 0 0 0 1
Y Aux Xal N V 1 0 0 0 0 0 0 1
Yinal N Aux X Ad 1 0 0 0 0 0 0 1
N be X Y Ad 1 0 1 0 0 0 0 0
X Aux 3sg N V Y Ad 1 0 1 0 0 0 0 0
X Aux 3sg Z V Y Ad 1 0 1 0 0 0 0 0
X Aux Yal N V 1 0 1 0 0 0 0 0
X Aux.neg V Y Ad 1 0 1 0 0 0 0 0
Xinal Y N Aux V 1 0 1 0 0 0 0 0
Y Aux V X N Ad N Ad 1 0 1 0 0 0 0 0
Yinal N be X N Ad 1 0 1 0 0 0 0 0
Z Aux V X Y loc 1 0 1 0 0 0 0 0
Z Aux X Y V 1 0 1 0 0 0 0 0
3sg Aux V Y Ad 1 1 0 0 0 0 0 0
Y Aux X N V Xrefl Ad 1 1 0 0 0 0 0 0
Yal X Aux V 1 1 0 0 0 0 0 0
X Aux Yinal N V 1 0 0 0 0 1 0 0
X Aux.Neg V Y Ad 1 0 0 0 0 1 0 0
X N Aux.Neg V Y Ad 1 0 0 0 0 1 0 0
X+Y Aux V Z Ad 1 0 0 0 0 1 0 0
Y N be.neg X Ad 1 0 0 0 0 1 0 0
X+Y Aux X+Yrefl N V 3sg Refl Ad 1 0 0 0 1 0 0 0
X+Y Aux X+Yrefl recp N V 1 0 0 0 1 0 0 0
X Aux N V Y N Ad 1 0 0 0 0 0 1 0
X N Aux V Y Ad 1 0 0 0 0 0 1 0
X int N Aux V Y Ad 1 0 0 0 0 0 1 0
X+Y Aux Ad N Num 1 0 0 0 0 0 1 0
X+Y Aux N V Z Ad 1 0 0 0 0 0 1 0
X+Y Aux V int Ad 1 0 0 0 0 0 1 0
Xal Y be.neg V 1 0 0 0 0 0 1 0
3sg N Vger be.neg Y Ad X Ad 1 0 0 1 0 0 0 0
X Aux Y V adj 1 0 0 1 0 0 0 0
X be adj Y Ad 1 0 0 1 0 0 0 0
X+Y Aux V X+Yrefl recp Ad 1 0 0 1 0 0 0 0
X+Y Aux V adv X+Yrefl recp Ad 1 0 0 1 0 0 0 0
X+Y Aux X+Yrefl N V 1 0 0 1 0 0 0 0
X+Y N Aux V 1 0 0 1 0 0 0 0
Xinal N Aux V Xpron Ad Y Ad 1 0 0 1 0 0 0 0
Xinal N be 1 0 0 1 0 0 0 0
Z Aux 3sg V X Ad Y 1 0 0 1 0 0 0 0

Маша предложила классификацию, которую я попробую воспроизвести.

  1. простая переходная вида X Aux Y V (только этот случай)
  2. простая непереходная X Aux V Y Ad, X Aux V loc, X+Y Aux V, Y Aux V X Ad (по идее исчерпывающий список, а что я забыла?)
  3. конструкция с копулой - можно взять автоматически все случаи, где в формуле есть копула be: Y be X Ad, X be Y Ad, X be adj Y Ad, N be Xgen Y Ad, X+Y be N Ad и т.д.
  4. некопульная конструкция со сложным глаголом - взять автоматически все случаи, где в формуле выполняется два условия: есть Aux (=нет be) и есть хотя бы одно N (вместе переходные, и непереходные) - X Aux N V Y Ad; X Aux Y N V; Xgen N Aux V Y Ad и т.д.

По идее это не исчерпывающий список, останется что-то на периферии вроде X+Y Aux V X+Ypron recp Ad, но это редкие штуки.

Code
df |> 
  count(language, construction_type) |>
  mutate(type = case_when(construction_type == "X Aux Y V" ~ "transitive",
                          str_detect(construction_type, "Aux") &
                            !str_detect(construction_type, "N") ~ "intransitive",
                          str_detect(construction_type, "be") ~ "copula",
                          str_detect(construction_type, "Aux") &
                            str_detect(construction_type, "N") ~ "complex verb",
                          TRUE ~ "other")) |> 
  group_by(construction_type) |> 
  mutate(overall = sum(n)) |>  
  pivot_wider(names_from = language, values_from = n, values_fill = 0) |> 
  arrange(desc(overall)) |> 
  select(construction_type, type,   overall, Guro, Dan_Gweetaa, Mano, Kpelle, Kono, Looma, Bamana)
construction_type type overall Guro Dan_Gweetaa Mano Kpelle Kono Looma Bamana
X Aux Y V transitive 296 43 37 45 36 32 42 61
X Aux V Y Ad intransitive 180 23 22 36 23 20 21 35
X Aux Y N V complex verb 100 5 11 35 21 12 9 7
X Aux N V Y Ad complex verb 68 12 15 10 6 7 12 6
X Aux V loc intransitive 37 6 5 8 7 5 6 0
Xinal N Aux V Y Ad complex verb 36 8 3 13 5 2 4 1
X Aux Xrefl N V Y Ad complex verb 33 4 2 11 7 5 0 4
Y be X Ad copula 19 2 1 4 4 4 2 2
Yinal N be X Ad copula 18 0 2 4 5 4 2 1
Xinal N be Y Ad copula 16 1 1 5 3 2 3 1
Y Aux V X Ad intransitive 16 3 2 2 1 2 2 4
X be Y Ad copula 15 1 1 2 3 4 1 3
Yinal N Aux V X Ad complex verb 13 0 2 7 3 1 0 0
X+Y Aux N V complex verb 12 0 0 5 5 2 0 0
Yinal N Aux X V complex verb 10 0 7 1 1 1 0 0
X+Y Aux V intransitive 9 1 0 0 3 1 1 3
X Aux Xrefl V Y Ad intransitive 8 1 1 2 1 1 1 1
Y Aux X N V complex verb 8 1 2 2 2 0 0 1
X Aux N V complex verb 8 0 1 2 2 2 1 0
Y Aux X V intransitive 6 0 1 0 0 0 0 5
Xinal Y Aux V intransitive 4 1 1 1 0 0 1 0
Xinal Y Aux Xrefl V intransitive 4 0 0 1 2 1 0 0
X+Y be N Ad copula 3 0 0 0 1 1 0 1
X Aux 3sg V Y Ad intransitive 3 1 1 1 0 0 0 0
Yinal N be.neg X Ad copula 3 0 0 1 1 1 0 0
X+Y Aux V X+Yrefl Ad intransitive 3 0 0 3 0 0 0 0
X Aux V Y N Ad complex verb 2 0 0 0 0 0 0 2
X V Aux Y Ad intransitive 2 0 0 0 0 0 0 2
X be X Ad copula 2 0 0 0 0 0 0 2
X be Y N Ad copula 2 0 1 0 0 0 0 1
Xinal Y Aux X V intransitive 2 0 1 0 0 0 0 1
Y Aux Y V intransitive 2 0 0 0 0 0 0 2
X Aux Xrefl N V Y N Ad complex verb 2 0 2 0 0 0 0 0
X be Y copula 2 1 1 0 0 0 0 0
Z Aux X V Y intransitive 2 1 1 0 0 0 0 0
Xinal N Aux N V Y Ad complex verb 2 2 0 0 0 0 0 0
X be.neg Y Ad copula 2 0 0 1 1 0 0 0
N adj be X Ad Vinf Y Ad copula 1 0 0 0 0 0 0 1
X Aux V Aux V Y Ad intransitive 1 0 0 0 0 0 0 1
X Aux Xrefl int V Y Ad intransitive 1 0 0 0 0 0 0 1
X Aux Y Ad intransitive 1 0 0 0 0 0 0 1
X Aux Y V Refl Ad intransitive 1 0 0 0 0 0 0 1
X Aux Z V Y Ad intransitive 1 0 0 0 0 0 0 1
X N V Y other 1 0 0 0 0 0 0 1
Xinal N Aux Y Ad complex verb 1 0 0 0 0 0 0 1
Y Aux V Aux V X Ad intransitive 1 0 0 0 0 0 0 1
Y Aux Xal N V complex verb 1 0 0 0 0 0 0 1
Yinal N Aux X Ad complex verb 1 0 0 0 0 0 0 1
N be X Y Ad copula 1 0 1 0 0 0 0 0
X Aux 3sg N V Y Ad complex verb 1 0 1 0 0 0 0 0
X Aux 3sg Z V Y Ad intransitive 1 0 1 0 0 0 0 0
X Aux Yal N V complex verb 1 0 1 0 0 0 0 0
X Aux.neg V Y Ad intransitive 1 0 1 0 0 0 0 0
Xinal Y N Aux V complex verb 1 0 1 0 0 0 0 0
Y Aux V X N Ad N Ad complex verb 1 0 1 0 0 0 0 0
Yinal N be X N Ad copula 1 0 1 0 0 0 0 0
Z Aux V X Y loc intransitive 1 0 1 0 0 0 0 0
Z Aux X Y V intransitive 1 0 1 0 0 0 0 0
3sg Aux V Y Ad intransitive 1 1 0 0 0 0 0 0
Y Aux X N V Xrefl Ad complex verb 1 1 0 0 0 0 0 0
Yal X Aux V intransitive 1 1 0 0 0 0 0 0
X Aux Yinal N V complex verb 1 0 0 0 0 1 0 0
X Aux.Neg V Y Ad complex verb 1 0 0 0 0 1 0 0
X N Aux.Neg V Y Ad complex verb 1 0 0 0 0 1 0 0
X+Y Aux V Z Ad intransitive 1 0 0 0 0 1 0 0
Y N be.neg X Ad copula 1 0 0 0 0 1 0 0
X+Y Aux X+Yrefl N V 3sg Refl Ad complex verb 1 0 0 0 1 0 0 0
X+Y Aux X+Yrefl recp N V complex verb 1 0 0 0 1 0 0 0
X Aux N V Y N Ad complex verb 1 0 0 0 0 0 1 0
X N Aux V Y Ad complex verb 1 0 0 0 0 0 1 0
X int N Aux V Y Ad complex verb 1 0 0 0 0 0 1 0
X+Y Aux Ad N Num complex verb 1 0 0 0 0 0 1 0
X+Y Aux N V Z Ad complex verb 1 0 0 0 0 0 1 0
X+Y Aux V int Ad intransitive 1 0 0 0 0 0 1 0
Xal Y be.neg V copula 1 0 0 0 0 0 1 0
3sg N Vger be.neg Y Ad X Ad copula 1 0 0 1 0 0 0 0
X Aux Y V adj intransitive 1 0 0 1 0 0 0 0
X be adj Y Ad copula 1 0 0 1 0 0 0 0
X+Y Aux V X+Yrefl recp Ad intransitive 1 0 0 1 0 0 0 0
X+Y Aux V adv X+Yrefl recp Ad intransitive 1 0 0 1 0 0 0 0
X+Y Aux X+Yrefl N V complex verb 1 0 0 1 0 0 0 0
X+Y N Aux V complex verb 1 0 0 1 0 0 0 0
Xinal N Aux V Xpron Ad Y Ad complex verb 1 0 0 1 0 0 0 0
Xinal N be copula 1 0 0 1 0 0 0 0
Z Aux 3sg V X Ad Y intransitive 1 0 0 1 0 0 0 0

Вот, что получилось. Маш, что исправить? Вот саммари:

Code
df |> 
  mutate(type = case_when(construction_type == "X Aux Y V" ~ "transitive",
                          str_detect(construction_type, "Aux") &
                            !str_detect(construction_type, "N") ~ "intransitive",
                          str_detect(construction_type, "be") ~ "copula",
                          str_detect(construction_type, "Aux") &
                            str_detect(construction_type, "N") ~ "complex verb",
                          TRUE ~ "other")) |> 
  count(type, language) |> 
  pivot_wider(names_from = language, values_from = n, values_fill = 0)  |> 
  arrange(-Looma) |> 
  select(type, Guro, Dan_Gweetaa, Mano, Kpelle, Kono, Looma, Bamana)
type Guro Dan_Gweetaa Mano Kpelle Kono Looma Bamana
transitive 43 37 45 36 32 42 61
intransitive 39 39 58 37 31 33 59
complex verb 33 49 89 54 35 31 24
copula 5 9 20 18 17 9 12
other 0 0 0 0 0 0 1

Во-вторых, есть отдельная просьба про распределение конструкций и глаголов. В первом листе stimuli_general в столбце B есть семантическая разметка глаголов. Добавь её, пожалуйста, в глагольную таблицу, которую ты будешь снова генерировать. А дальше я бы попросила сделать следующее. Посчитать для каждого глагола, сколько ему в наших языках соответствует разных конструкций из 4 типов, выделенных выше. А потом слить глаголы из одной семантической группы и посчитать для семантических групп вместе статистику по конструкциям. Грубо говоря я хочу проверить, верно ли, что у глаголов вида effect будет больше переходных конструкций, а у feeling больше сложных глаголов, т.к. более абстрактные значения языки склонны выражать метафорически.

Code
df |> 
  left_join(stimuli) |> 
    mutate(type = case_when(construction_type == "X Aux Y V" ~ "transitive",
                          str_detect(construction_type, "Aux") &
                            !str_detect(construction_type, "N") ~ "intransitive",
                          str_detect(construction_type, "be") ~ "copula",
                          str_detect(construction_type, "Aux") &
                            str_detect(construction_type, "N") ~ "complex verb",
                          TRUE ~ "other")) |> 
  count(type, language, semantic_type) |> 
  mutate(language = str_replace(language, "_", " "),
         language = factor(language, levels = c("Guro", "Dan Gweetaa", "Mano", "Kpelle", "Kono", "Looma", "Bamana"))) |> 
  ggplot(aes(n, type))+
  geom_col()+
  facet_grid(semantic_type~language, scales = "free")

Ну вот примерно, то, что ты имела в виду, наверное, но это все нужно пересчитать, после того, как мы утвердим тип каждой из конструкции.

Какие адлоги есть для каждого языка?

Code
df |>
  mutate(adposition = str_split(adposition, ", ")) |> 
  unnest_longer(adposition) |> 
  count(language, adposition) |> 
  na.omit()
language adposition n
Bamana bólo 5
Bamana fɛ̀ 7
Bamana kàn 5
Bamana kɔ́ 4
Bamana kɔ́nɔ 1
Bamana kɔ́rɔ 1
Bamana lá 29
Bamana mà 5
Bamana nɔ̀fɛ̀ 2
Bamana yé 6
Bamana ɲɛ́ 1
Dan_Gweetaa bhàa 2
Dan_Gweetaa bhȁ 23
Dan_Gweetaa dhi̋ɤ 3
Dan_Gweetaa dhɛ̏ 2
Dan_Gweetaa gɔ̏ 5
Dan_Gweetaa gɯ́ 5
Dan_Gweetaa ká 14
Dan_Gweetaa kèŋ̏ 1
Dan_Gweetaa loc 6
Dan_Gweetaa pi̋ɤ 2
Dan_Gweetaa tȁ 4
Dan_Gweetaa zɯ̏ 1
Guro jì 2
Guro la 2
Guro leè 3
Guro loc 6
Guro 8
Guro ta 4
Guro và 8
Guro ya̰ 9
Guro zì 2
Guro zuo 2
Guro ɓa̰ 20
Kono 11
Kono hù 5
Kono lá 1
Kono loc 5
Kono mà 28
Kono mɛ̌i 1
Kono pòò 1
Kono pɔ̀ 4
Kono yêi 4
Kpelle 11
Kpelle hù 3
Kpelle lá 1
Kpelle loc 7
Kpelle púlû 4
Kpelle pɔ́ 3
Kpelle yêi 4
Kpelle ɓà 33
Kpelle ɲá 1
Looma bà 7
Looma bù 1
Looma bɛ̀ 5
Looma gà 10
Looma kómá 1
Looma loc 6
Looma mà 19
Looma pòlù 2
Looma sù 3
Looma tá 2
Looma yà 3
Mano bà 5
Mano gé 1
Mano gɛ̀nɛ̀ 1
Mano ká 24
Mano kɛ̀lɛ̀ 6
Mano là 7
Mano loc 8
Mano lɛ̀ɛ̄ 6
Mano mɔ̀ 45
Mano píé 7
Mano yí 1
Mano ŋwɛ́ŋ̀ 4

Какие были раньше проблемы:

  • в коно нет адлогов
  • в дан гвета есть адлоги dhi̋ɤ и dhiɤ̋, bhȁ и bhàa, которые, наверное, про одно и то же. Кроме того есть адлог loc.
  • в гуро есть адлоги léè и leè, lɛ̄ и lɛ, ɓa̰ и ɓā̰. Кроме того есть адлог loc.
  • в лома есть адлоги mà и mà̀ (с двумя знаками тона). Кроме того есть адлог loc.
  • в мано есть адлоги píé и píé (я не понимаю в чем разница…).

Проблемы на 15.02.2023:

  • В гуро ɓa̰ и ɓā̰ — разное?
  • В гуро ya̰ и yā̰ — разное?

Я не совсем понял, что написала МХ, так что я пока посчитал вот такое вот. Какие адлоги из разных языков встречаются друг с другом в одной расширеной конструкции внутри одного стимула?

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         !is.na(adposition)) |> 
  group_by(number) |> 
  mutate(adposition = str_c(language, "__", adposition)) |> 
  pairwise_count(adposition, construction_type)  |>
  group_by(item1, item2) |> 
  reframe(n = sum(n)) |> 
  arrange(desc(n))
item1 item2 n
Kpelle__ɓà Mano__mɔ̀ 22
Mano__mɔ̀ Kpelle__ɓà 22
Kono__mà Kpelle__ɓà 16
Kpelle__ɓà Kono__mà 16
Kono__mà Mano__mɔ̀ 15
Mano__mɔ̀ Kono__mà 15
Kono__à Kpelle__à 11
Kpelle__à Kono__à 11
Dan_Gweetaa__bhȁ Kpelle__ɓà 7
Dan_Gweetaa__bhȁ Mano__mɔ̀ 7
Kono__à Mano__ká 7
Kpelle__à Mano__ká 7
Kpelle__ɓà Dan_Gweetaa__bhȁ 7
Mano__ká Kono__à 7
Mano__ká Kpelle__à 7
Mano__mɔ̀ Dan_Gweetaa__bhȁ 7
Bamana__lá Kpelle__ɓà 6
Guro__ɓa̰ Kpelle__ɓà 6
Kpelle__ɓà Bamana__lá 6
Kpelle__ɓà Guro__ɓa̰ 6
Bamana__lá Mano__mɔ̀ 5
Dan_Gweetaa__loc Guro__loc 5
Dan_Gweetaa__loc Kono__loc 5
Dan_Gweetaa__loc Kpelle__loc 5
Dan_Gweetaa__loc Looma__loc 5
Dan_Gweetaa__loc Mano__loc 5
Guro__loc Dan_Gweetaa__loc 5
Guro__loc Kono__loc 5
Guro__loc Kpelle__loc 5
Guro__loc Looma__loc 5
Guro__loc Mano__loc 5
Guro__ɓa̰ Kono__mà 5
Guro__ɓa̰ Mano__mɔ̀ 5
Kono__loc Dan_Gweetaa__loc 5
Kono__loc Guro__loc 5
Kono__loc Kpelle__loc 5
Kono__loc Looma__loc 5
Kono__loc Mano__loc 5
Kono__mà Guro__ɓa̰ 5
Kono__mà Looma__mà 5
Kpelle__loc Dan_Gweetaa__loc 5
Kpelle__loc Guro__loc 5
Kpelle__loc Kono__loc 5
Kpelle__loc Looma__loc 5
Kpelle__loc Mano__loc 5
Kpelle__ɓà Looma__mà 5
Looma__loc Dan_Gweetaa__loc 5
Looma__loc Guro__loc 5
Looma__loc Kono__loc 5
Looma__loc Kpelle__loc 5
Looma__loc Mano__loc 5
Looma__mà Kono__mà 5
Looma__mà Kpelle__ɓà 5
Looma__mà Mano__mɔ̀ 5
Mano__loc Dan_Gweetaa__loc 5
Mano__loc Guro__loc 5
Mano__loc Kono__loc 5
Mano__loc Kpelle__loc 5
Mano__loc Looma__loc 5
Mano__mɔ̀ Bamana__lá 5
Mano__mɔ̀ Guro__ɓa̰ 5
Mano__mɔ̀ Looma__mà 5
Bamana__lá Guro__ɓa̰ 4
Dan_Gweetaa__bhȁ Kono__mà 4
Dan_Gweetaa__ká Guro__ɓa̰ 4
Guro__ɓa̰ Bamana__lá 4
Guro__ɓa̰ Dan_Gweetaa__ká 4
Kono__mà Dan_Gweetaa__bhȁ 4
Kono__yêi Kpelle__yêi 4
Kpelle__yêi Kono__yêi 4
Bamana__lá Dan_Gweetaa__bhȁ 3
Bamana__lá Dan_Gweetaa__ká 3
Bamana__lá Kono__mà 3
Bamana__lá Looma__mà 3
Dan_Gweetaa__bhȁ Bamana__lá 3
Dan_Gweetaa__ká Bamana__lá 3
Dan_Gweetaa__ká Guro__ya̰ 3
Guro__lɛ Kpelle__ɓà 3
Guro__và Kpelle__ɓà 3
Guro__và Looma__mà 3
Guro__và Mano__mɔ̀ 3
Guro__ya̰ Dan_Gweetaa__ká 3
Guro__ya̰ Kono__à 3
Guro__ya̰ Kpelle__à 3
Kono__à Guro__ya̰ 3
Kono__mà Bamana__lá 3
Kono__mà Looma__bà 3
Kono__yêi Mano__kɛ̀lɛ̀ 3
Kpelle__à Guro__ya̰ 3
Kpelle__yêi Mano__kɛ̀lɛ̀ 3
Kpelle__ɓà Guro__lɛ 3
Kpelle__ɓà Guro__và 3
Kpelle__ɓà Looma__bà 3
Kpelle__ɓà Mano__ká 3
Looma__bà Kono__mà 3
Looma__bà Kpelle__ɓà 3
Looma__bà Mano__mɔ̀ 3
Looma__mà Bamana__lá 3
Looma__mà Guro__và 3
Mano__ká Kpelle__ɓà 3
Mano__kɛ̀lɛ̀ Kono__yêi 3
Mano__kɛ̀lɛ̀ Kpelle__yêi 3
Mano__mɔ̀ Guro__và 3
Mano__mɔ̀ Looma__bà 3
Bamana__bólo Kono__yêi 2
Bamana__bólo Kpelle__yêi 2
Bamana__fɛ̀ Mano__píé 2
Bamana__kɔ́ Guro__zuo 2
Bamana__kɔ́ Kpelle__púlû 2
Bamana__lá Guro__và 2
Bamana__lá Kono__hù 2
Bamana__lá Kpelle__hù 2
Bamana__lá Looma__bà 2
Bamana__lá Mano__ká 2
Bamana__yé Guro__lɛ 2
Bamana__yé Looma__bɛ̀ 2
Dan_Gweetaa__bhȁ Guro__ta 2
Dan_Gweetaa__bhȁ Guro__và 2
Dan_Gweetaa__gɔ̏ Guro__leè 2
Dan_Gweetaa__gɯ́ Kono__mà 2
Dan_Gweetaa__gɯ́ Kpelle__ɓà 2
Dan_Gweetaa__gɯ́ Looma__mà 2
Dan_Gweetaa__gɯ́ Mano__mɔ̀ 2
Dan_Gweetaa__ká Guro__và 2
Dan_Gweetaa__ká Kono__à 2
Dan_Gweetaa__ká Kono__mà 2
Dan_Gweetaa__ká Kpelle__à 2
Dan_Gweetaa__ká Mano__ká 2
Dan_Gweetaa__ká Mano__mɔ̀ 2
Dan_Gweetaa__tȁ Guro__ta 2
Guro__leè Dan_Gweetaa__gɔ̏ 2
Guro__lɛ Bamana__yé 2
Guro__lɛ Mano__lɛ̀ɛ̄ 2
Guro__lɛ Mano__mɔ̀ 2
Guro__ta Dan_Gweetaa__bhȁ 2
Guro__ta Dan_Gweetaa__tȁ 2
Guro__ta Kono__mà 2
Guro__ta Kpelle__ɓà 2
Guro__ta Mano__mɔ̀ 2
Guro__và Bamana__lá 2
Guro__và Dan_Gweetaa__bhȁ 2
Guro__và Dan_Gweetaa__ká 2
Guro__và Kpelle__pɔ́ 2
Guro__và Mano__píé 2
Guro__ya̰ Mano__ká 2
Guro__zuo Bamana__kɔ́ 2
Guro__zuo Kpelle__púlû 2
Guro__ɓa̰ Looma__gà 2
Guro__ɓa̰ Looma__mà 2
Kono__à Dan_Gweetaa__ká 2
Kono__à Looma__gà 2
Kono__hù Bamana__lá 2
Kono__hù Kpelle__hù 2
Kono__mà Dan_Gweetaa__gɯ́ 2
Kono__mà Dan_Gweetaa__ká 2
Kono__mà Guro__ta 2
Kono__mà Mano__ká 2
Kono__yêi Bamana__bólo 2
Kono__yêi Looma__yà 2
Kpelle__à Dan_Gweetaa__ká 2
Kpelle__à Looma__gà 2
Kpelle__hù Bamana__lá 2
Kpelle__hù Kono__hù 2
Kpelle__púlû Bamana__kɔ́ 2
Kpelle__púlû Guro__zuo 2
Kpelle__púlû Mano__píé 2
Kpelle__pɔ́ Guro__và 2
Kpelle__pɔ́ Mano__píé 2
Kpelle__yêi Bamana__bólo 2
Kpelle__yêi Looma__yà 2
Kpelle__ɓà Dan_Gweetaa__gɯ́ 2
Kpelle__ɓà Guro__ta 2
Kpelle__ɓà Looma__gà 2
Looma__bà Bamana__lá 2
Looma__bɛ̀ Bamana__yé 2
Looma__gà Guro__ɓa̰ 2
Looma__gà Kono__à 2
Looma__gà Kpelle__à 2
Looma__gà Kpelle__ɓà 2
Looma__gà Mano__ká 2
Looma__mà Dan_Gweetaa__gɯ́ 2
Looma__mà Guro__ɓa̰ 2
Looma__mà Looma__sù 2
Looma__sù Looma__mà 2
Looma__yà Kono__yêi 2
Looma__yà Kpelle__yêi 2
Looma__yà Mano__kɛ̀lɛ̀ 2
Mano__ká Bamana__lá 2
Mano__ká Dan_Gweetaa__ká 2
Mano__ká Guro__ya̰ 2
Mano__ká Kono__mà 2
Mano__ká Looma__gà 2
Mano__kɛ̀lɛ̀ Looma__yà 2
Mano__lɛ̀ɛ̄ Guro__lɛ 2
Mano__mɔ̀ Dan_Gweetaa__gɯ́ 2
Mano__mɔ̀ Dan_Gweetaa__ká 2
Mano__mɔ̀ Guro__lɛ 2
Mano__mɔ̀ Guro__ta 2
Mano__píé Bamana__fɛ̀ 2
Mano__píé Guro__và 2
Mano__píé Kpelle__púlû 2
Mano__píé Kpelle__pɔ́ 2
Mano__píé Mano__ŋwɛ́ŋ̀ 2
Mano__ŋwɛ́ŋ̀ Mano__píé 2
Bamana__bólo Bamana__fɛ̀ 1
Bamana__bólo Looma__yà 1
Bamana__bólo Mano__kɛ̀lɛ̀ 1
Bamana__fɛ̀ Bamana__bólo 1
Bamana__fɛ̀ Dan_Gweetaa__bhȁ 1
Bamana__fɛ̀ Dan_Gweetaa__pi̋ɤ 1
Bamana__fɛ̀ Guro__và 1
Bamana__fɛ̀ Kono__pɔ̀ 1
Bamana__fɛ̀ Kpelle__pɔ́ 1
Bamana__fɛ̀ Kpelle__ɓà 1
Bamana__fɛ̀ Looma__bà 1
Bamana__fɛ̀ Looma__tá 1
Bamana__fɛ̀ Mano__lɛ̀ɛ̄ 1
Bamana__fɛ̀ Mano__mɔ̀ 1
Bamana__kàn Bamana__kɔ́ 1
Bamana__kàn Bamana__lá 1
Bamana__kàn Bamana__mà 1
Bamana__kàn Bamana__nɔ̀fɛ̀ 1
Bamana__kàn Dan_Gweetaa__kèŋ̏ 1
Bamana__kàn Dan_Gweetaa__tȁ 1
Bamana__kàn Guro__ta 1
Bamana__kàn Guro__zuo 1
Bamana__kàn Kono__mà 1
Bamana__kàn Kpelle__púlû 1
Bamana__kàn Kpelle__ɓà 1
Bamana__kàn Looma__pòlù 1
Bamana__kàn Mano__mɔ̀ 1
Bamana__kɔ́ Bamana__kàn 1
Bamana__kɔ́ Bamana__lá 1
Bamana__kɔ́ Bamana__nɔ̀fɛ̀ 1
Bamana__kɔ́ Dan_Gweetaa__kèŋ̏ 1
Bamana__kɔ́ Kono__mà 1
Bamana__kɔ́ Kono__pòò 1
Bamana__kɔ́ Kpelle__ɓà 1
Bamana__kɔ́ Looma__mà 1
Bamana__kɔ́ Looma__pòlù 1
Bamana__kɔ́ Mano__mɔ̀ 1
Bamana__kɔ́ Mano__píé 1
Bamana__lá Bamana__kàn 1
Bamana__lá Bamana__kɔ́ 1
Bamana__lá Bamana__nɔ̀fɛ̀ 1
Bamana__lá Dan_Gweetaa__bhàa 1
Bamana__lá Dan_Gweetaa__kèŋ̏ 1
Bamana__lá Guro__lɛ 1
Bamana__lá Guro__ta 1
Bamana__lá Guro__ya̰ 1
Bamana__lá Guro__zuo 1
Bamana__lá Kono__mɛ̌i 1
Bamana__lá Kpelle__púlû 1
Bamana__lá Looma__bù 1
Bamana__lá Looma__gà 1
Bamana__lá Looma__pòlù 1
Bamana__lá Mano__bà 1
Bamana__lá Mano__là 1
Bamana__lá Mano__yí 1
Bamana__mà Bamana__kàn 1
Bamana__mà Dan_Gweetaa__tȁ 1
Bamana__mà Guro__ta 1
Bamana__mà Kono__mà 1
Bamana__mà Kpelle__ɓà 1
Bamana__mà Mano__mɔ̀ 1
Bamana__nɔ̀fɛ̀ Bamana__kàn 1
Bamana__nɔ̀fɛ̀ Bamana__kɔ́ 1
Bamana__nɔ̀fɛ̀ Bamana__lá 1
Bamana__nɔ̀fɛ̀ Dan_Gweetaa__bhȁ 1
Bamana__nɔ̀fɛ̀ Dan_Gweetaa__kèŋ̏ 1
Bamana__nɔ̀fɛ̀ Guro__zuo 1
Bamana__nɔ̀fɛ̀ Kono__mà 1
Bamana__nɔ̀fɛ̀ Kpelle__púlû 1
Bamana__nɔ̀fɛ̀ Kpelle__ɓà 1
Bamana__nɔ̀fɛ̀ Looma__bɛ̀ 1
Bamana__nɔ̀fɛ̀ Looma__pòlù 1
Bamana__nɔ̀fɛ̀ Mano__mɔ̀ 1
Bamana__yé Dan_Gweetaa__dhi̋ɤ 1
Bamana__yé Looma__gà 1
Bamana__yé Mano__lɛ̀ɛ̄ 1
Bamana__ɲɛ́ Dan_Gweetaa__gɔ̏ 1
Bamana__ɲɛ́ Guro__leè 1
Bamana__ɲɛ́ Kono__mà 1
Bamana__ɲɛ́ Kpelle__ɓà 1
Bamana__ɲɛ́ Looma__bà 1
Bamana__ɲɛ́ Mano__ká 1
Bamana__ɲɛ́ Mano__lɛ̀ɛ̄ 1
Dan_Gweetaa__bhàa Bamana__lá 1
Dan_Gweetaa__bhàa Kono__hù 1
Dan_Gweetaa__bhàa Kpelle__hù 1
Dan_Gweetaa__bhàa Looma__bù 1
Dan_Gweetaa__bhàa Mano__bà 1
Dan_Gweetaa__bhȁ Bamana__fɛ̀ 1
Dan_Gweetaa__bhȁ Bamana__nɔ̀fɛ̀ 1
Dan_Gweetaa__bhȁ Dan_Gweetaa__tȁ 1
Dan_Gweetaa__bhȁ Guro__ya̰ 1
Dan_Gweetaa__bhȁ Guro__zì 1
Dan_Gweetaa__bhȁ Guro__ɓa̰ 1
Dan_Gweetaa__bhȁ Kono__à 1
Dan_Gweetaa__bhȁ Kono__hù 1
Dan_Gweetaa__bhȁ Kpelle__à 1
Dan_Gweetaa__bhȁ Looma__bà 1
Dan_Gweetaa__bhȁ Looma__bɛ̀ 1
Dan_Gweetaa__bhȁ Looma__gà 1
Dan_Gweetaa__bhȁ Looma__mà 1
Dan_Gweetaa__bhȁ Looma__sù 1
Dan_Gweetaa__bhȁ Mano__bà 1
Dan_Gweetaa__bhȁ Mano__là 1
Dan_Gweetaa__dhi̋ɤ Bamana__yé 1
Dan_Gweetaa__dhɛ̏ Mano__lɛ̀ɛ̄ 1
Dan_Gweetaa__gɔ̏ Bamana__ɲɛ́ 1
Dan_Gweetaa__gɔ̏ Kono__mà 1
Dan_Gweetaa__gɔ̏ Kono__yêi 1
Dan_Gweetaa__gɔ̏ Kpelle__yêi 1
Dan_Gweetaa__gɔ̏ Kpelle__ɓà 1
Dan_Gweetaa__gɔ̏ Looma__bà 1
Dan_Gweetaa__gɔ̏ Looma__yà 1
Dan_Gweetaa__gɔ̏ Mano__gɛ̀nɛ̀ 1
Dan_Gweetaa__gɔ̏ Mano__ká 1
Dan_Gweetaa__gɔ̏ Mano__kɛ̀lɛ̀ 1
Dan_Gweetaa__gɔ̏ Mano__lɛ̀ɛ̄ 1
Dan_Gweetaa__gɯ́ Guro__và 1
Dan_Gweetaa__gɯ́ Guro__ɓa̰ 1
Dan_Gweetaa__gɯ́ Looma__sù 1
Dan_Gweetaa__gɯ́ Mano__píé 1
Dan_Gweetaa__gɯ́ Mano__ŋwɛ́ŋ̀ 1
Dan_Gweetaa__ká Dan_Gweetaa__tȁ 1
Dan_Gweetaa__ká Looma__bà 1
Dan_Gweetaa__ká Looma__gà 1
Dan_Gweetaa__ká Looma__tá 1
Dan_Gweetaa__kèŋ̏ Bamana__kàn 1
Dan_Gweetaa__kèŋ̏ Bamana__kɔ́ 1
Dan_Gweetaa__kèŋ̏ Bamana__lá 1
Dan_Gweetaa__kèŋ̏ Bamana__nɔ̀fɛ̀ 1
Dan_Gweetaa__kèŋ̏ Guro__zuo 1
Dan_Gweetaa__kèŋ̏ Kpelle__púlû 1
Dan_Gweetaa__kèŋ̏ Looma__pòlù 1
Dan_Gweetaa__pi̋ɤ Bamana__fɛ̀ 1
Dan_Gweetaa__pi̋ɤ Guro__và 1
Dan_Gweetaa__pi̋ɤ Kono__pɔ̀ 1
Dan_Gweetaa__pi̋ɤ Kpelle__pɔ́ 1
Dan_Gweetaa__pi̋ɤ Mano__píé 1
Dan_Gweetaa__tȁ Bamana__kàn 1
Dan_Gweetaa__tȁ Bamana__mà 1
Dan_Gweetaa__tȁ Dan_Gweetaa__bhȁ 1
Dan_Gweetaa__tȁ Dan_Gweetaa__ká 1
Dan_Gweetaa__tȁ Guro__jì 1
Dan_Gweetaa__tȁ Kono__hù 1
Dan_Gweetaa__tȁ Kono__mà 1
Dan_Gweetaa__tȁ Kpelle__ɓà 1
Dan_Gweetaa__tȁ Looma__mà 1
Dan_Gweetaa__tȁ Mano__là 1
Dan_Gweetaa__tȁ Mano__mɔ̀ 1
Dan_Gweetaa__zɯ̏ Guro__zì 1
Dan_Gweetaa__zɯ̏ Kono__mà 1
Dan_Gweetaa__zɯ̏ Looma__mà 1
Dan_Gweetaa__zɯ̏ Mano__mɔ̀ 1
Guro__jì Dan_Gweetaa__tȁ 1
Guro__jì Looma__mà 1
Guro__leè Bamana__ɲɛ́ 1
Guro__leè Kono__mà 1
Guro__leè Kono__yêi 1
Guro__leè Kpelle__yêi 1
Guro__leè Kpelle__ɓà 1
Guro__leè Looma__bà 1
Guro__leè Looma__yà 1
Guro__leè Mano__ká 1
Guro__leè Mano__kɛ̀lɛ̀ 1
Guro__leè Mano__lɛ̀ɛ̄ 1
Guro__lɛ Bamana__lá 1
Guro__lɛ Kono__mà 1
Guro__lɛ Kono__mɛ̌i 1
Guro__lɛ Looma__bɛ̀ 1
Guro__lɛ Mano__là 1
Guro__ta Bamana__kàn 1
Guro__ta Bamana__lá 1
Guro__ta Bamana__mà 1
Guro__ta Kono__hù 1
Guro__ta Looma__bà 1
Guro__ta Mano__là 1
Guro__và Bamana__fɛ̀ 1
Guro__và Dan_Gweetaa__gɯ́ 1
Guro__và Dan_Gweetaa__pi̋ɤ 1
Guro__và Guro__ya̰ 1
Guro__và Guro__ɓa̰ 1
Guro__và Kono__mà 1
Guro__và Kono__pɔ̀ 1
Guro__và Looma__bà 1
Guro__và Mano__ŋwɛ́ŋ̀ 1
Guro__ya̰ Bamana__lá 1
Guro__ya̰ Dan_Gweetaa__bhȁ 1
Guro__ya̰ Guro__và 1
Guro__ya̰ Looma__gà 1
Guro__ya̰ Looma__tá 1
Guro__ya̰ Mano__mɔ̀ 1
Guro__zì Dan_Gweetaa__bhȁ 1
Guro__zì Dan_Gweetaa__zɯ̏ 1
Guro__zì Kono__mà 1
Guro__zì Looma__mà 1
Guro__zì Mano__mɔ̀ 1
Guro__zuo Bamana__kàn 1
Guro__zuo Bamana__lá 1
Guro__zuo Bamana__nɔ̀fɛ̀ 1
Guro__zuo Dan_Gweetaa__kèŋ̏ 1
Guro__zuo Kono__pòò 1
Guro__zuo Looma__pòlù 1
Guro__zuo Mano__píé 1
Guro__ɓa̰ Dan_Gweetaa__bhȁ 1
Guro__ɓa̰ Dan_Gweetaa__gɯ́ 1
Guro__ɓa̰ Guro__và 1
Guro__ɓa̰ Looma__bà 1
Guro__ɓa̰ Looma__kómá 1
Guro__ɓa̰ Mano__bà 1
Guro__ɓa̰ Mano__ká 1
Guro__ɓa̰ Mano__píé 1
Guro__ɓa̰ Mano__ŋwɛ́ŋ̀ 1
Kono__à Dan_Gweetaa__bhȁ 1
Kono__à Looma__tá 1
Kono__à Mano__mɔ̀ 1
Kono__hù Dan_Gweetaa__bhàa 1
Kono__hù Dan_Gweetaa__bhȁ 1
Kono__hù Dan_Gweetaa__tȁ 1
Kono__hù Guro__ta 1
Kono__hù Looma__bù 1
Kono__hù Looma__mà 1
Kono__hù Mano__bà 1
Kono__hù Mano__ká 1
Kono__hù Mano__là 1
Kono__hù Mano__yí 1
Kono__lá Kpelle__lá 1
Kono__lá Looma__yà 1
Kono__lá Mano__là 1
Kono__mà Bamana__kàn 1
Kono__mà Bamana__kɔ́ 1
Kono__mà Bamana__mà 1
Kono__mà Bamana__nɔ̀fɛ̀ 1
Kono__mà Bamana__ɲɛ́ 1
Kono__mà Dan_Gweetaa__gɔ̏ 1
Kono__mà Dan_Gweetaa__tȁ 1
Kono__mà Dan_Gweetaa__zɯ̏ 1
Kono__mà Guro__leè 1
Kono__mà Guro__lɛ 1
Kono__mà Guro__và 1
Kono__mà Guro__zì 1
Kono__mà Kpelle__hù 1
Kono__mà Looma__bɛ̀ 1
Kono__mà Looma__gà 1
Kono__mà Mano__bà 1
Kono__mà Mano__lɛ̀ɛ̄ 1
Kono__mà Mano__píé 1
Kono__mà Mano__ŋwɛ́ŋ̀ 1
Kono__mɛ̌i Bamana__lá 1
Kono__mɛ̌i Guro__lɛ 1
Kono__mɛ̌i Kpelle__ɓà 1
Kono__mɛ̌i Mano__là 1
Kono__pòò Bamana__kɔ́ 1
Kono__pòò Guro__zuo 1
Kono__pòò Kpelle__púlû 1
Kono__pòò Mano__píé 1
Kono__pɔ̀ Bamana__fɛ̀ 1
Kono__pɔ̀ Dan_Gweetaa__pi̋ɤ 1
Kono__pɔ̀ Guro__và 1
Kono__pɔ̀ Kpelle__pɔ́ 1
Kono__pɔ̀ Mano__mɔ̀ 1
Kono__pɔ̀ Mano__píé 1
Kono__yêi Dan_Gweetaa__gɔ̏ 1
Kono__yêi Guro__leè 1
Kpelle__à Dan_Gweetaa__bhȁ 1
Kpelle__à Looma__tá 1
Kpelle__à Mano__mɔ̀ 1
Kpelle__hù Dan_Gweetaa__bhàa 1
Kpelle__hù Kono__mà 1
Kpelle__hù Looma__bù 1
Kpelle__hù Looma__mà 1
Kpelle__hù Mano__bà 1
Kpelle__hù Mano__ká 1
Kpelle__hù Mano__yí 1
Kpelle__lá Kono__lá 1
Kpelle__lá Looma__yà 1
Kpelle__lá Mano__là 1
Kpelle__púlû Bamana__kàn 1
Kpelle__púlû Bamana__lá 1
Kpelle__púlû Bamana__nɔ̀fɛ̀ 1
Kpelle__púlû Dan_Gweetaa__kèŋ̏ 1
Kpelle__púlû Kono__pòò 1
Kpelle__púlû Looma__pòlù 1
Kpelle__pɔ́ Bamana__fɛ̀ 1
Kpelle__pɔ́ Dan_Gweetaa__pi̋ɤ 1
Kpelle__pɔ́ Kono__pɔ̀ 1
Kpelle__pɔ́ Looma__mà 1
Kpelle__yêi Dan_Gweetaa__gɔ̏ 1
Kpelle__yêi Guro__leè 1
Kpelle__ɓà Bamana__fɛ̀ 1
Kpelle__ɓà Bamana__kàn 1
Kpelle__ɓà Bamana__kɔ́ 1
Kpelle__ɓà Bamana__mà 1
Kpelle__ɓà Bamana__nɔ̀fɛ̀ 1
Kpelle__ɓà Bamana__ɲɛ́ 1
Kpelle__ɓà Dan_Gweetaa__gɔ̏ 1
Kpelle__ɓà Dan_Gweetaa__tȁ 1
Kpelle__ɓà Guro__leè 1
Kpelle__ɓà Kono__mɛ̌i 1
Kpelle__ɓà Looma__bɛ̀ 1
Kpelle__ɓà Mano__bà 1
Kpelle__ɓà Mano__kɛ̀lɛ̀ 1
Kpelle__ɓà Mano__là 1
Kpelle__ɓà Mano__lɛ̀ɛ̄ 1
Kpelle__ɓà Mano__píé 1
Kpelle__ɓà Mano__ŋwɛ́ŋ̀ 1
Looma__bà Bamana__fɛ̀ 1
Looma__bà Bamana__ɲɛ́ 1
Looma__bà Dan_Gweetaa__bhȁ 1
Looma__bà Dan_Gweetaa__gɔ̏ 1
Looma__bà Dan_Gweetaa__ká 1
Looma__bà Guro__leè 1
Looma__bà Guro__ta 1
Looma__bà Guro__và 1
Looma__bà Guro__ɓa̰ 1
Looma__bà Mano__ká 1
Looma__bà Mano__lɛ̀ɛ̄ 1
Looma__bù Bamana__lá 1
Looma__bù Dan_Gweetaa__bhàa 1
Looma__bù Kono__hù 1
Looma__bù Kpelle__hù 1
Looma__bù Mano__bà 1
Looma__bɛ̀ Bamana__nɔ̀fɛ̀ 1
Looma__bɛ̀ Dan_Gweetaa__bhȁ 1
Looma__bɛ̀ Guro__lɛ 1
Looma__bɛ̀ Kono__mà 1
Looma__bɛ̀ Kpelle__ɓà 1
Looma__bɛ̀ Mano__ká 1
Looma__bɛ̀ Mano__mɔ̀ 1
Looma__gà Bamana__lá 1
Looma__gà Bamana__yé 1
Looma__gà Dan_Gweetaa__bhȁ 1
Looma__gà Dan_Gweetaa__ká 1
Looma__gà Guro__ya̰ 1
Looma__gà Kono__mà 1
Looma__gà Mano__bà 1
Looma__kómá Guro__ɓa̰ 1
Looma__mà Bamana__kɔ́ 1
Looma__mà Dan_Gweetaa__bhȁ 1
Looma__mà Dan_Gweetaa__tȁ 1
Looma__mà Dan_Gweetaa__zɯ̏ 1
Looma__mà Guro__jì 1
Looma__mà Guro__zì 1
Looma__mà Kono__hù 1
Looma__mà Kpelle__hù 1
Looma__mà Kpelle__pɔ́ 1
Looma__mà Mano__ká 1
Looma__mà Mano__kɛ̀lɛ̀ 1
Looma__mà Mano__píé 1
Looma__mà Mano__yí 1
Looma__mà Mano__ŋwɛ́ŋ̀ 1
Looma__pòlù Bamana__kàn 1
Looma__pòlù Bamana__kɔ́ 1
Looma__pòlù Bamana__lá 1
Looma__pòlù Bamana__nɔ̀fɛ̀ 1
Looma__pòlù Dan_Gweetaa__kèŋ̏ 1
Looma__pòlù Guro__zuo 1
Looma__pòlù Kpelle__púlû 1
Looma__sù Dan_Gweetaa__bhȁ 1
Looma__sù Dan_Gweetaa__gɯ́ 1
Looma__tá Bamana__fɛ̀ 1
Looma__tá Dan_Gweetaa__ká 1
Looma__tá Guro__ya̰ 1
Looma__tá Kono__à 1
Looma__tá Kpelle__à 1
Looma__tá Mano__ká 1
Looma__tá Mano__lɛ̀ɛ̄ 1
Looma__tá Mano__píé 1
Looma__yà Bamana__bólo 1
Looma__yà Dan_Gweetaa__gɔ̏ 1
Looma__yà Guro__leè 1
Looma__yà Kono__lá 1
Looma__yà Kpelle__lá 1
Looma__yà Mano__là 1
Mano__bà Bamana__lá 1
Mano__bà Dan_Gweetaa__bhàa 1
Mano__bà Dan_Gweetaa__bhȁ 1
Mano__bà Guro__ɓa̰ 1
Mano__bà Kono__hù 1
Mano__bà Kono__mà 1
Mano__bà Kpelle__hù 1
Mano__bà Kpelle__ɓà 1
Mano__bà Looma__bù 1
Mano__bà Looma__gà 1
Mano__gɛ̀nɛ̀ Dan_Gweetaa__gɔ̏ 1
Mano__ká Bamana__ɲɛ́ 1
Mano__ká Dan_Gweetaa__gɔ̏ 1
Mano__ká Guro__leè 1
Mano__ká Guro__ɓa̰ 1
Mano__ká Kono__hù 1
Mano__ká Kpelle__hù 1
Mano__ká Looma__bà 1
Mano__ká Looma__bɛ̀ 1
Mano__ká Looma__mà 1
Mano__ká Looma__tá 1
Mano__ká Mano__kɛ̀lɛ̀ 1
Mano__ká Mano__lɛ̀ɛ̄ 1
Mano__ká Mano__yí 1
Mano__ká Mano__ŋwɛ́ŋ̀ 1
Mano__kɛ̀lɛ̀ Bamana__bólo 1
Mano__kɛ̀lɛ̀ Dan_Gweetaa__gɔ̏ 1
Mano__kɛ̀lɛ̀ Guro__leè 1
Mano__kɛ̀lɛ̀ Kpelle__ɓà 1
Mano__kɛ̀lɛ̀ Looma__mà 1
Mano__kɛ̀lɛ̀ Mano__ká 1
Mano__là Bamana__lá 1
Mano__là Dan_Gweetaa__bhȁ 1
Mano__là Dan_Gweetaa__tȁ 1
Mano__là Guro__lɛ 1
Mano__là Guro__ta 1
Mano__là Kono__hù 1
Mano__là Kono__lá 1
Mano__là Kono__mɛ̌i 1
Mano__là Kpelle__lá 1
Mano__là Kpelle__ɓà 1
Mano__là Looma__yà 1
Mano__lɛ̀ɛ̄ Bamana__fɛ̀ 1
Mano__lɛ̀ɛ̄ Bamana__yé 1
Mano__lɛ̀ɛ̄ Bamana__ɲɛ́ 1
Mano__lɛ̀ɛ̄ Dan_Gweetaa__dhɛ̏ 1
Mano__lɛ̀ɛ̄ Dan_Gweetaa__gɔ̏ 1
Mano__lɛ̀ɛ̄ Guro__leè 1
Mano__lɛ̀ɛ̄ Kono__mà 1
Mano__lɛ̀ɛ̄ Kpelle__ɓà 1
Mano__lɛ̀ɛ̄ Looma__bà 1
Mano__lɛ̀ɛ̄ Looma__tá 1
Mano__lɛ̀ɛ̄ Mano__ká 1
Mano__lɛ̀ɛ̄ Mano__píé 1
Mano__mɔ̀ Bamana__fɛ̀ 1
Mano__mɔ̀ Bamana__kàn 1
Mano__mɔ̀ Bamana__kɔ́ 1
Mano__mɔ̀ Bamana__mà 1
Mano__mɔ̀ Bamana__nɔ̀fɛ̀ 1
Mano__mɔ̀ Dan_Gweetaa__tȁ 1
Mano__mɔ̀ Dan_Gweetaa__zɯ̏ 1
Mano__mɔ̀ Guro__ya̰ 1
Mano__mɔ̀ Guro__zì 1
Mano__mɔ̀ Kono__à 1
Mano__mɔ̀ Kono__pɔ̀ 1
Mano__mɔ̀ Kpelle__à 1
Mano__mɔ̀ Looma__bɛ̀ 1
Mano__mɔ̀ Mano__píé 1
Mano__mɔ̀ Mano__ŋwɛ́ŋ̀ 1
Mano__píé Bamana__kɔ́ 1
Mano__píé Dan_Gweetaa__gɯ́ 1
Mano__píé Dan_Gweetaa__pi̋ɤ 1
Mano__píé Guro__zuo 1
Mano__píé Guro__ɓa̰ 1
Mano__píé Kono__mà 1
Mano__píé Kono__pòò 1
Mano__píé Kono__pɔ̀ 1
Mano__píé Kpelle__ɓà 1
Mano__píé Looma__mà 1
Mano__píé Looma__tá 1
Mano__píé Mano__lɛ̀ɛ̄ 1
Mano__píé Mano__mɔ̀ 1
Mano__yí Bamana__lá 1
Mano__yí Kono__hù 1
Mano__yí Kpelle__hù 1
Mano__yí Looma__mà 1
Mano__yí Mano__ká 1
Mano__ŋwɛ́ŋ̀ Dan_Gweetaa__gɯ́ 1
Mano__ŋwɛ́ŋ̀ Guro__và 1
Mano__ŋwɛ́ŋ̀ Guro__ɓa̰ 1
Mano__ŋwɛ́ŋ̀ Kono__mà 1
Mano__ŋwɛ́ŋ̀ Kpelle__ɓà 1
Mano__ŋwɛ́ŋ̀ Looma__mà 1
Mano__ŋwɛ́ŋ̀ Mano__ká 1
Mano__ŋwɛ́ŋ̀ Mano__mɔ̀ 1

Я попробовал сделать таблицу, но она получается огромная… Возьмем только примеры, где больше одного:

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         !is.na(adposition)) |>  
  group_by(number) |> 
  mutate(adposition = str_c(language, "__", adposition)) |> 
  pairwise_count(adposition, construction_type) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) |> 
  filter(n > 1) |> 
  mutate(item1 = fct_reorder(item1, n, .desc = TRUE),
         item2 = fct_reorder(item2, n, .desc = TRUE)) |> 
  ggplot(aes(item1, item2))+
  geom_tile(aes(fill = n), colour = "white") +
  geom_text(aes(label = n), colour = "white") +
  scale_fill_gradient(low = "tomato", high = "darkred")+
  coord_fixed()+
  labs(x = "", y = "") +
  theme(legend.position = "bottom",
        axis.text.x = element_text(angle = 90, vjust = 0, hjust=1))

Вот так я понял Машину идеи про то, как посмотреть соответствия между языками, берите лупу:

Code
df |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         !is.na(adposition)) |>  
  group_by(number) |> 
  mutate(adposition = str_c(language, "__", adposition)) |> 
  pairwise_count(adposition, construction_type) |> 
  group_by(item1, item2) |> 
  reframe(n = sum(n)) |> 
  filter(n > 1) |> 
  separate(item1, into = c("language1", "adposition1"), sep = "__") |> 
  separate(item2, into = c("language2", "adposition2"), sep = "__") |> 
  group_by(language1, language2, adposition1) |> 
  reframe(ratio = n/sum(n),
            n = n,
            adposition2 = adposition2) |> 
  distinct() |> 
  mutate(language1 = str_replace(language1, "_", " "),
         language1 = factor(language1, levels = c("Guro", "Dan Gweetaa", "Mano", "Kpelle", "Kono", "Looma", "Bamana")),
         language2 = str_replace(language2, "_", " "),
         language2 = factor(language2, levels = c("Guro", "Dan Gweetaa", "Mano", "Kpelle", "Kono", "Looma", "Bamana"))) |> 
  ggplot(aes(ratio, adposition1, label = str_c(adposition2, "\n",
                                               round(ratio*100), "% (",
                                               n, ")")))+
  geom_col(color = "white")+
  geom_label(position = position_stack(vjust = .5), size = 2)+
  facet_grid(language1~language2, scales = "free")+
  labs(y = "")

Все примеры, без фильтрации

Code
et <- readxl::read_xlsx("../GM_merged_wide_verb_MK_0927.xlsx")
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology (all stimuli)",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть все языки

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть все языки, случайная этимология для каждого языка

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(number %in% to_keep) |>
  group_by(number, predicate_eng, source) |> 
  slice_sample(n = 1)  |> 
  distinct() |> 
  ungroup() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |>
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть заполненная этимология

Если посмотреть только на те случаи, где заполнена этимология, то останется 21 пример.

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  na.omit() |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Все этимологии:

Code
et <- readxl::read_xlsx("../GM_merged_wide_verb_MK_0927.xlsx")
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |>
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") ->
  etym

etym |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology (all stimuli)",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
etym  |> 
  neighborNet() |> 
  plot()
title(main = "Verb etymology (all stimuli)")

Кластеризация со случайным эквивалентом. Я запустил более 10 раз – ничего в структуре не поменялось, только длина ножек менялась.

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1) |> 
  group_by(number, source) |> 
  sample_n(size = 1) |> 
  ungroup() |> 
  mutate(value2 = 1) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Kono = Kono_etym,
         Mano = Mano_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") ->
  etym_random

etym_random |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Verb etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
etym_random  |> 
  neighborNet() |> 
  plot()
title(main = "Verb etymology with random equivalents and common stimuli")

Это сделано по следующей таблице:

Code
et |> 
  select(number, predicate_eng, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = value) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = "") |>
  arrange(number, predicate_eng, value) |> 
  select(-value)
number predicate_eng Guro_etym Looma_etym Mano_etym Dan_etym Bamana_etym Kpelle_etym Kono_etym
1 hurt break (to)-2-wi
1 hurt do-1-ke
1 hurt go out (to)-bo
1 hurt hurt-xona hurt-xona hurt-xona
1 hurt illness-1-jankaro
1 hurt pain-1-dimi
1 hurt sufferance-4-waa
1 hurt turn-pene
2 be sick with catch-2-miita
2 be sick with do-1-ke
3 be afraid of fear-1-g’ila fear-1-g’ila
3 be afraid of fear-3-duwa fear-3-duwa
3 be afraid of fear-4-gaaxu fear-4-gaaxu
4 throw throw-1 (to)-fili throw-1 (to)-fili throw-1 (to)-fili throw-1 (to)-fili throw-1 (to)-fili
4 throw throw-2 (to)-zu throw-2 (to)-zu
5 be sufficient to catch-3-catch
5 be sufficient to go out (to)-bo
6 be like sb go out (to)-bo go out (to)-bo go out (to)-bo go out (to)-bo
6 be like sb knock down-kula knock down-kula knock down-kula
7 have trust confidence-dannaya
7 have trust go out (to)-bo
7 have trust lie down-1-d’a lie down-1-d’a lie down-1-d’a lie down-1-d’a
7 have trust send-2-bo
7 have trust stand (to)-1-d’o stand (to)-1-d’o
8 take take-2-si take-2-si take-2-si
8 take take-3-ta
8 take take-5-sige take-5-sige take-5-sige
9 see what look-2 (to)-ga
9 see what see (to)-1-ye see (to)-1-ye see (to)-1-ye
9 see what see (to)-3-ka see (to)-3-ka see (to)-3-ka
10 influence press-1 (to)-digi
10 influence put-2-kpa
11 meet go out (to)-bo
11 meet meet-ben
11 meet see (to)-1-ye see (to)-1-ye see (to)-1-ye
11 meet see (to)-3-ka
12 enter send-2-bo
12 enter throw-2 (to)-zu
12 enter enter (to)-1 enter (to)-1
12 enter enter (to)-1-so
12 enter enter (to)-2-dzon
12 enter enter (to)-3-wola enter (to)-3-wola
12 enter lie down-1-d’a
13 win able, be-1-mo
13 win beat-1-bugo
13 win do-1-ke
13 win prepare (to)-baa
13 win receive-sodon
13 win rise (to)-1-te
13 win take-2-si
14 leave take-5-sige
14 leave go away (to)-go
14 leave go out (to)-bo go out (to)-bo go out (to)-bo
14 leave knock down-kula knock down-kula knock down-kula
15 chase chase (to)-kpe chase (to)-kpe
15 chase come-3-nu
15 chase go (to)-1-di go (to)-1-di
15 chase go away (to)-go
16 bend bend-4-kula
16 bend bend-1-bidin
16 bend bend-3-kpilin bend-3-kpilin bend-3-kpilin bend-3-kpilin
16 bend bend-4-kula
17 say say-1-fo say-1-fo say-1-fo
17 say say-5-gee
17 say say-6-yee say-6-yee say-6-yee
18 hold catch-3-kun
18 hold catch-4-song
18 hold hold-mara
19 catch catch-3-kun catch-3-kun catch-3-kun
19 catch catch-4-song catch-4-song catch-4-song
19 catch stand (to)-1-do
20 to milk go out (to)-bo
20 to milk knock down-kula knock down-kula knock down-kula
20 to milk milk (to)-bidi
20 to milk take-2-si
21 reach arrive-1-k’e
21 reach arrive-4-kite arrive-4-kite
21 reach go out (to)-bo go out (to)-bo go out (to)-bo
22 touch 0
22 touch touch (to)-4-tungbong touch (to)-4-tungbong
22 touch 0
22 touch sit (to)-1-sigi
22 touch touch (to)-1-maga
22 touch touch (to)-2-pa touch (to)-2-pa
22 touch touch (to)-4-tungbong touch (to)-4-tungbong
23 fight sb do-4-goon do-4-goon do-4-goon do-4-goon do-4-goon
23 fight sb measure (to)-2-dan
23 fight sb war-1-kele
24 be friends with do-1-ke
24 be friends with lie down-1-d’a
25 think about sb come-3-nu
25 think about sb do-1-ke
25 think about sb think (to)-1-miira
25 think about sb walk-1-sigan walk-1-sigan walk-1-sigan
25 think about sb walk-3-taga
26 eat drink (to)-1-min drink (to)-1-min drink (to)-1-min
26 eat eat (to)-1-don/domu
26 eat eat (to)-2-bele eat (to)-2-bele eat (to)-2-bele
27 fry fry (to)-geran fry (to)-geran fry (to)-geran fry (to)-geran fry (to)-geran fry (to)-geran
28 wait help-2-kpong
28 wait remain-to
28 wait stand (to)-1-d’o stand (to)-1-d’o
28 wait wait-1-kono
28 wait wait-2-gben wait-2-gben wait-2-gben wait-2-gben
29 forget forget-2-nemu
29 forget forget-1-nyina forget-1-nyina
29 forget forget-2-nemu forget-2-nemu forget-2-nemu
29 forget go out (to)-bo
29 forget loose (to)-1-sama
30 depend on take-2-si take-2-si
31 call call-2-wele
31 call call-3-dhee call-3-dhee
31 call call-5-toli call-5-toli call-5-toli
31 call do-1-ke
32 meet, faire connaissance know (to)-1-d’on know (to)-1-d’on know (to)-1-d’on know (to)-1-d’on
32 meet, faire connaissance know (to)-2-kolon know (to)-2-kolon know (to)-2-kolon
33 know someone know (to)-1-d’on know (to)-1-d’on know (to)-1-d’on know (to)-1-d’on
33 know someone know (to)-2-kolon know (to)-2-kolon know (to)-2-kolon
34 play blow-1-fe blow-1-fe
34 play kill-2-je
34 play forge (to)-2-galin forge (to)-2-galin
34 play kill-2-je kill-2-je
34 play say-1-fo
34 play strike-2-ma
35 avoid go out (to)-bo
35 avoid lie down-1-d’a lie down-1-d’a
35 avoid put-2-kpa
35 avoid spread (to)-2-kpo spread (to)-2-kpo
36 fabricate 0
36 fabricate do-1-ke do-1-ke do-1-ke
36 fabricate make-kpeteng make-kpeteng
36 fabricate prepare (to)-baa
36 fabricate weave (to)-1-dan
37 mock stand (to)-1-d’o
37 mock tear (to)-pera
37 mock go out (to)-bo
37 mock knock down-kula
37 mock laugh-1-jele laugh-1-jele laugh-1-jele
37 mock stand (to)-1-d’o
39 seek spread (to)-2-kpo
39 seek search-1-nyini
39 seek search-3-gini
39 seek search-4-koli
39 seek walk-1-sigan
39 seek walk-3-taga
40 paint draw (a picture)-nyege
40 paint pass-2-gile
40 paint rise (to)-1-te
40 paint whiten-fele
40 paint write-2-been
41 bite bite (to)-kin bite (to)-kin bite (to)-kin bite (to)-kin
41 bite catch-3-kun
41 bite stand (to)-1-d’o stand (to)-1-d’o
42 be deprived fall (to)-2-dia
42 be deprived fail-5-kon
42 be deprived fail-6-fenge fail-6-fenge
42 be deprived forget-1-nyina
42 be deprived loose (to)-1-sama
42 be deprived misfortune-1-bono
43 catch chase (to)-kpe
43 catch catch-2-miita
43 catch catch-3-kun catch-3-kun catch-3-kun
43 catch catch-4-song catch-4-song catch-4-song
44 break break (to)-1-gali
44 break break (to)-1-gali break (to)-1-gali break (to)-1-gali break (to)-1-gali break (to)-1-gali break (to)-1-gali
44 break break (to)-2-wi
45 flatter do-1-ke
45 flatter rise (to)-1-te rise (to)-1-te rise (to)-1-te
46 love do-1-ke
46 love go out (to)-bo
46 love do-1-ke do-1-ke
46 love go out (to)-bo
46 love love-xani
47 wave go out (to)-bo
47 wave shake-6-yuguyugu
48 dream go out (to)-bo go out (to)-bo
48 dream kill-2-je
49 wash wash-1-poli
49 wash wash-2-zulu wash-2-zulu
49 wash wash-3-ko wash-3-ko wash-3-ko wash-3-ko
50 put on do-5-wo
50 put on enter (to)-1-so enter (to)-1-so
50 put on enter (to)-2-dzon
50 put on lie down-1-d’a
50 put on pour-2 (to)-pu
51 call smth call-3-dhee call-3-dhee
51 call smth do-1-ke
51 call smth say-5-gee
51 call smth speak-1-xo
52 punish lie down-1-d’a
52 punish lie down-3-wo
52 punish punish-nyangi
52 punish stand (to)-1-d’o
53 attack lie down-1-d’a
53 attack venom-baga
53 attack break (to)-3-golo break (to)-3-golo
53 attack catch-4-song
53 attack fall (to)-1-bele
53 attack fall (to)-3-pele
53 attack rush-3-gbidi
53 attack sit (to)-2-yaga
53 attack venom-baga
54 be filled with smth fill (to)-1-pa fill (to)-1-pa fill (to)-1-pa fill (to)-1-pa fill (to)-1-pa fill (to)-1-pa fill (to)-1-pa
55 find smth see (to)-1-ye see (to)-1-ye see (to)-1-ye
55 find smth see (to)-3-ka see (to)-3-ka see (to)-3-ka
56 lack go out (to)-bo
57 hate, detest boil-3-fili
57 hate, detest do-1-ke
57 hate, detest end-6-kpe
57 hate, detest hate-xoni
57 hate, detest take-5-sige
58 like lie down-1-d’a
58 like catch-3-kun
58 like catch-4-song
58 like do-1-ke
58 like good-2
58 like good-2-di
58 like please (to)-so
58 like tasty-nene
59 need catch-3-kun
60 surround encircle-2-kooli
60 surround encircle-1-minin
60 surround encircle-2-kooli
60 surround round-1-dhidhi round-1-dhidhi
60 surround round-3-tintan round-3-tintan
60 surround round-4-kanka
61 be left end-2-bo
61 be left leave (go away)-1-dho leave (go away)-1-dho leave (go away)-1-dho
61 be left remain-to remain-to
62 respond go out (to)-bo
62 respond round-1-dhidhi
62 respond transform-ponden
62 respond agree-3-son
62 respond answer-1-jabi
62 respond go out (to)-bo
62 respond transform-ponden transform-ponden
63 open go out (to)-bo go out (to)-bo go out (to)-bo
63 open open-1-polo
63 open open-2-yele
63 open stand (to)-1-d’o
64 be different fall (to)-4-to
64 be different go out (to)-bo
64 be different knock down-kula
64 be different take-2-si
65 fall behind end-2-bo
65 fall behind leave (go away)-1-dho leave (go away)-1-dho
65 fall behind remain-to remain-to
66 plow do-1-ke do-1-ke
66 plow farm-sene
66 plow go out (to)-bo
66 plow plant (to)-1-sing
66 plow put-2-kpa
67 smell stand (to)-1-d’o
68 traverse, cross cut-1 (to)-kan cut-1 (to)-kan
68 traverse, cross cut-3 (to)-kini
68 traverse, cross cut-4 (to)-tige
68 traverse, cross cut-8 (to)-tebe cut-8 (to)-tebe cut-8 (to)-tebe
68 traverse, cross spread (to)-2-kpo
69 sing fall (to)-4-to fall (to)-4-to fall (to)-4-to fall (to)-4-to fall (to)-4-to
69 sing lie down-1-d’a
69 sing send-2-bo
70 write mark-ponyang
70 write do-1-ke
70 write kill-2-je kill-2-je
70 write mark-ponyang mark-ponyang mark-ponyang
70 write write-safe
71 drink drink (to)-1-min drink (to)-1-min drink (to)-1-min drink (to)-1-min
71 drink drink (to)-2-kpole drink (to)-2-kpole drink (to)-2-kpole
72 melt melt-2-senge
72 melt melt-1-yeelen
72 melt melt-2-senge melt-2-senge melt-2-senge
72 melt pour-2 (to)-pu
73 approach able, be-1-mo
73 approach catch-4-song catch-4-song
73 approach go (to)-2-dho
73 approach meet-ben
74 leave go away (to)-go
74 leave go away (to)-go
74 leave go out (to)-bo
74 leave rise (to)-2-wili
74 leave take-2-si
74 leave take-5-sige take-5-sige take-5-sige
75 75) cover gather-1-dhen
75 75) cover bend-2-bugun
75 75) cover close-1 (to), shut (to)-tugu
75 75) cover close-2 (to), shut (to)-tan
75 75) cover close-3 (to), shut (to)-kporu
75 75) cover fructify-ba
75 75) cover lie down-1-d’a
75 75) cover lie down-4-kpaa lie down-4-kpaa
76 76) remember come-3-nu
76 76) remember descend-1-jigi
76 76) remember lie down-1-d’a
76 76) remember stand (to)-1-d’o
76 76) remember take-2-si
76 76) remember wake up-2-buo wake up-2-buo
76 76) remember walk-1-sigan walk-1-sigan
77 77) help go out (to)-bo
77 77) help throw-2 (to)-zu
77 77) help catch-3-kun
77 77) help go out (to)-bo
77 77) help help-1-deemaa
77 77) help help-2-kpong help-2-kpong help-2-kpong
77 77) help pass-3-tanbi
78 78) understand hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli
78 78) understand understand-fa’amu
79 79) fall into descend-1-jigi descend-1-jigi
79 79) fall into fall (to)-4-to fall (to)-4-to fall (to)-4-to
79 79) fall into lie down-1-d’a lie down-1-d’a
80 80) cut with wound-3-gii
80 80) cut with cut-1 (to)-kan cut-1 (to)-kan
80 80) cut with cut-4 (to)-tige
80 80) cut with cut-5 (to)-sege
80 80) cut with cut-8 (to)-tebe cut-8 (to)-tebe
80 80) cut with wound-4-tugo
81 81) stick to smth fasten-nodo fasten-nodo
81 81) stick to smth stuck, get (to)-kpatang stuck, get (to)-kpatang stuck, get (to)-kpatang
82 82) lose to smb prepare (to)-baa
83 83) be happy about, se rejouir de catch-3-kun
83 83) be happy about, se rejouir de do-1-ke
83 83) be happy about, se rejouir de good-2
83 83) be happy about, se rejouir de good-2-di
83 83) be happy about, se rejouir de tasty-nene tasty-nene tasty-nene
84 84) speak with do-1-ke
84 84) speak with fall (to)-4-to
84 84) speak with fall (to)-4-to
84 84) speak with go out (to)-bo go out (to)-bo
84 84) speak with kill-2-je
84 84) speak with speak-3-we
84 84) speak with stand (to)-1-d’o
84 84) speak with talk-1-bado
85 85) give birth to bear, give birth-1-banki
85 85) give birth to save-2-dha
85 85) give birth to bear, give birth-3-yaa bear, give birth-3-yaa
85 85) give birth to bear, give birth-5
85 85) give birth to descend-1-jigi descend-1-jigi
85 85) give birth to receive-sodon
85 85) give birth to see (to)-3-ka
85 85) give birth to spread (to)-2-kpo
86 86) let fall smth fall (to)-1-bele
86 86) let fall smth fall (to)-2-dia
86 86) let fall smth fall (to)-4-to fall (to)-4-to fall (to)-4-to
86 86) let fall smth go out (to)-bo
86 86) let fall smth lie down-1-d’a
87 87) diriger, gouverner take-2-si
87 87) diriger, gouverner go out (to)-bo
87 87) diriger, gouverner say-5-gee
88 88) miss catch-3-kun
88 88) miss catch-4-song catch-4-song
89 89) follow go away (to)-go
89 89) follow pass-2-gile
89 89) follow put-1-bila put-1-bila
90 90) climb down put-1-bila
90 90) climb down descend-1-jigi descend-1-jigi descend-1-jigi
90 90) climb down descend-2-yolo descend-2-yolo
90 90) climb down knock down-kula
90 90) climb down put-1-bila
91 91) listen to smth hear (to)-moli hear (to)-moli hear (to)-moli
91 91) listen to smth remain-to
91 91) listen to smth stand (to)-1-d’o stand (to)-1-d’o stand (to)-1-d’o stand (to)-1-d’o stand (to)-1-d’o
92 92) listen to smb go out (to)-bo
92 92) listen to smb obey-kolo obey-kolo
92 92) listen to smb sit (to)-2-yaga
92 92) listen to smb spread (to)-2-kpo
93 93) hear hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli hear (to)-moli
93 93) hear stand (to)-1-d’o
94 94) mix up with smth mix-4-nyaxami
94 94) mix up with smth mix-6-sunpu mix-6-sunpu
95 95) look at end-3-nyia
95 95) look at look-1 (to)-felen
95 95) look at look-2 (to)-ga look-2 (to)-ga
95 95) look at look-4 (to)-kpele
95 95) look at search-3-gini
95 95) look at see (to)-3-ka see (to)-3-ka
96 96) take off go out (to)-bo go out (to)-bo go out (to)-bo go out (to)-bo
96 96) take off knock down-kula knock down-kula
97 97) appear in a dream dream-1-k’uyi
97 97) appear in a dream go out (to)-bo go out (to)-bo go out (to)-bo
97 97) appear in a dream kill-2-je
98 98) agree lie down-1-d’a
98 98) agree 0
98 98) agree agree-3-son
98 98) agree lie down-1-d’a
98 98) agree speak-3-we
98 98) agree stand (to)-1-d’o
98 98) agree tasty-nene
99 99) dispute do-1-ke
99 99) dispute do-1-ke do-1-ke
99 99) dispute noise-1-sonxo
99 99) dispute stand (to)-1-d’o
99 99) dispute war-1-kele
101 101) shoot at destroy (to)-1-te
101 101) shoot at lie down-1-d’a lie down-1-d’a
101 101) shoot at shoot, sting (to)-bon
101 101) shoot at stand (to)-1-to stand (to)-1-to
101 101) shoot at throw-1 (to)-fili
102 102) pour (dry product) do-1-ke do-1-ke
102 102) pour (dry product) go out (to)-bo
102 102) pour (dry product) pour-2 (to)-pu pour-2 (to)-pu
103 103) lose loose (to)-1-sama
103 103) lose drop-1 (to)-budun
103 103) lose fall (to)-2-dia
103 103) lose fall (to)-4-to
103 103) lose forget-1-nyina
103 103) lose loose (to)-2-leenu loose (to)-2-leenu
103 103) lose throw-1 (to)-fili
104 104) drown remain-to
104 104) drown die-1-kha
104 104) drown disappear-tunu
104 104) drown enter (to)-1-so
104 104) drown leave (go away)-1-dho leave (go away)-1-dho
104 104) drown remain-to
105 105) kill kill-1-faxa kill-1-faxa kill-1-faxa kill-1-faxa
105 105) kill kill-2-je kill-2-je kill-2-je
106 106) hit 0hit-kele 0hit-kele
106 106) hit beat-1-bugo
106 106) hit beat-2-dokpe
106 106) hit strike-2-ma strike-2-ma
107 107) kiss stand (to)-1-d’o
108 108) read read-2-lonong
108 108) read go out (to)-bo
108 108) read read-1-kara read-1-kara
108 108) read say-1-fo say-1-fo
108 108) read say-5-gee
109 109) move smth 0move-tumu 0move-tumu
109 109) move smth go out (to)-bo
109 109) move smth move (to)-3-lamaga
109 109) move smth noise-2-vin
109 109) move smth shake-1-miimii
110 110) respect do-1-ke
110 110) respect give (to)-3-fe
110 110) respect go out (to)-bo
110 110) respect heavy-1-bi heavy-1-bi
110 110) respect respect-3-bonya
110 110) respect sit (to)-2-yaga
111 111) disdain spoil-1-sile
111 111) disdain do-1-ke
111 111) disdain go out (to)-bo go out (to)-bo
111 111) disdain lie down-1-d’a
111 111) disdain loathe-nyigin
111 111) disdain see (to)-1-ye
112 112) be happy about fill (to)-1-pa
112 112) be happy about good-2-di
112 112) be happy about satisfy-wasa
112 112) be happy about tasty-nene tasty-nene tasty-nene
113 113) fall in love with lie down-1-d’a lie down-1-d’a
113 113) fall in love with love-xani
113 113) fall in love with passion-jarabi
113 113) fall in love with stand (to)-1-to stand (to)-1-to
114 114) trust in sb lie down-1-d’a lie down-1-d’a
115 115) have pity of catch-4-song
115 115) have pity of do-1-ke do-1-ke
115 115) have pity of pity-2-hina
116 116) be envious of cut-1 (to)-kan
116 116) be envious of do-1-ke
116 116) be envious of do-5-wo
116 116) be envious of fill (to)-1-pa
116 116) be envious of hate-xoni
116 116) be envious of swell-3-fuunu
117 117) be angry with bind-gidi
117 117) be angry with tie-1
117 117) be angry with bend-4-kula
117 117) be angry with bitter-2-xunan
117 117) be angry with boil-3-fili
117 117) be angry with catch-4-song
117 117) be angry with pain-1-dimi
117 117) be angry with pain-2-soli
117 117) be angry with take-2-si
118 118) be surprised about do-1-ke
118 118) be surprised about knock down-kula
118 118) be surprised about throw-1 (to)-fili
118 118) be surprised about throw-2 (to)-zu
119 119) love smth do-1-ke
119 119) love smth good-2-di
119 119) love smth please (to)-so
119 119) love smth tasty-nene
120 120) enjoy/ take pleasure in go out (to)-bo
120 120) enjoy/ take pleasure in knock down-kula
120 120) enjoy/ take pleasure in see (to)-1-ye
120 120) enjoy/ take pleasure in take-2-si
120 120) enjoy/ take pleasure in tasty-nene
121 121) want do-1-ke
122 122) be angry with = 117? boil-3-fili
122 122) be angry with = 117? catch-4-song
123 123) take offense by sb do-1-ke
123 123) take offense by sb go out (to)-bo
123 123) take offense by sb lie down-1-d’a
124 124) make sad sb chase (to)-kpe
124 124) make sad sb cut-8 (to)-tebe
124 124) make sad sb do-1-ke
124 124) make sad sb enter (to)-2-dzon
124 124) make sad sb kill-1-faxa
124 124) make sad sb knock down-kula
124 124) make sad sb wound-3-gii
125 125) be surprised by end-1-ban
125 125) be surprised by throw-1 (to)-fili
126 126) despise sb kill-2-je
126 126) despise sb better-fisa
126 126) despise sb do-1-ke
126 126) despise sb go out (to)-bo go out (to)-bo
126 126) despise sb stand (to)-1-d’o
127 127) be sad because of sb fill (to)-1-pa
127 127) be sad because of sb break (to)-1-gali
127 127) be sad because of sb cry, weep (to)-1-kasi
127 127) be sad because of sb cut-8 (to)-tebe
127 127) be sad because of sb stand (to)-1-d’o
127 127) be sad because of sb tasty-nene
128 128) be annoyed by sb boil-3-fili
128 128) be annoyed by sb catch-4-song
128 128) be annoyed by sb pain-1-dimi
129 129) sympathise to sb help-2-kpong
129 129) sympathise to sb go out (to)-bo go out (to)-bo
130 130) be embarrassed by sb cold-3-deli
130 130) be embarrassed by sb embarrass-2-kpala embarrass-2-kpala embarrass-2-kpala
130 130) be embarrassed by sb hinder-1-degun

Все примеры, без фильтрации

Code
et <- readxl::read_xlsx("../GM_merged_wide_adpositions_MK_0926.xlsx")
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology (all stimuli)",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть все языки

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть все языки, случайная этимология для каждого языка

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(number %in% to_keep) |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  mutate(value = str_squish(value)) |> 
  distinct() |> 
  filter(value != "0") |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  ungroup() |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Только стимулы, для которых есть заполненная этимология

К сожалению, таких строчек всего одна:

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  na.omit()
number predicate_eng stimuli Guro etym Looma etym Mano etym Dan etym Kono etym Kpelle etym Bamana etym
3 be afraid of (P. has to go out of the house, but there is a dog barking in the yard). P. a peur du chien. palm of hand-degere postposition by-ba postposition-for-2-len; postposition with-3-ga postposition-for-1-gon postposition-on-1-ma postposition-on-1-ma eye-nya

Все этимологии:

Code
et <- readxl::read_xlsx("../GM_merged_wide_adpositions_MK_0926.xlsx")
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |> 
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym"))  |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Mano = Mano_etym,
         Kono = Kono_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") ->
  etym

etym |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology (all stimuli)",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
etym  |> 
  neighborNet() |> 
  plot()
title(main = "Adposition etymology (all stimuli)")

Кластеризация со случайным эквивалентом. Я запустил более 10 раз – ничего в структуре не поменялось, только длина ножек менялась.

Code
et |> 
  select(number, predicate_eng, stimuli, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = 1) |> 
  group_by(number, source) |> 
  sample_n(size = 1) |> 
  ungroup() |> 
  mutate(value2 = 1,
         source = str_replace(source, " ", "_")) |>  
  pivot_wider(names_from = source, values_from = value2, values_fill = 0) |> 
  select(ends_with("etym")) |> 
  rename(Guro = Guro_etym,
         Looma = Looma_etym,
         Kono = Kono_etym,
         Mano = Mano_etym,
         Dan_Gweetaa = Dan_etym,
         Bamana = Bamana_etym,
         Kpelle = Kpelle_etym) |> 
  t() |> 
  dist(method = "binary") ->
  etym_random

etym_random |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Adposition etymology with random equivalents",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
etym_random  |> 
  neighborNet() |> 
  plot()
title(main = "Adposition etymology with random equivalents")

Это сделано по следующей таблице:

Code
et |> 
  select(number, predicate_eng, ends_with("etym")) |> 
  pivot_longer(names_to = "source", 
               values_to = "value", 
               ends_with("etym")) |> 
  na.omit() |> 
  filter(value != "0") |> 
  distinct() |> 
  mutate(value = str_replace_all(value, "able, be", "able||| be"),
         value = str_replace_all(value, "bear, give", "bear||| give"),
         value = str_replace_all(value, ", shut", "||| shut"),
         value = str_replace_all(value, "cry, weep", "cry||| weep"),
         value = str_replace_all(value, "shoot, sting", "shoot||| sting"),
         value = str_replace_all(value, "stuck, get", "stuck||| get"),
         value = str_replace_all(value, ",", ";"),
         value = str_replace_all(value, "\\|\\|\\|", ","),
         value = str_split(value, ";")) |> 
  unnest_longer(value) |> 
  distinct() |> 
  mutate(value2 = value) |> 
  pivot_wider(names_from = source, values_from = value2) |>
  arrange(number, predicate_eng, value) |> 
  select(-value)
number predicate_eng Dan etym Guro etym Looma etym Mano etym Kono etym Kpelle etym Bamana etym
1 hurt postposition with-3-ga NA NA NA NA NA NA
1 hurt postposition-on-1-ma NA NA NA NA NA NA
2 be sick with NA NA NA NA NA NA postposition at-la
2 be sick with postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
3 be afraid of NA NA NA postposition with-3-ga NA NA NA
3 be afraid of NA NA NA NA NA NA eye-nya
3 be afraid of NA palm of hand-degere NA NA NA NA NA
3 be afraid of NA NA postposition by-ba NA NA NA NA
3 be afraid of postposition-for-1-gon NA NA NA NA NA NA
3 be afraid of NA NA NA postposition-for-2-len NA NA NA
3 be afraid of NA NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
5 be sufficient to NA NA NA NA arm-2-je arm-2-je NA
5 be sufficient to NA NA NA NA hand hand NA
6 be like sb NA NA NA postposition by-ba NA NA NA
6 be like sb NA NA postposition with-3-ga NA NA NA NA
6 be like sb postposition-on-1-ma postposition-on-1-ma NA NA postposition-on-1-ma postposition-on-1-ma NA
6 be like sb NA NA NA NA NA NA side-3-fe
7 have trust before-dhie NA NA NA NA NA NA
7 have trust NA NA NA NA NA NA postposition at-la
7 have trust NA postposition-for-2-len NA NA NA NA NA
7 have trust NA NA NA postposition-on-1-ma NA postposition-on-1-ma NA
7 have trust NA NA NA surface-1-ta surface-1-ta surface-1-ta NA
9 see what postposition-on-1-ma NA NA NA NA NA NA
10 influence NA surface-1-ta NA NA NA NA NA
11 meet NA postposition with-2-yan NA NA NA NA NA
11 meet NA NA NA NA postposition with-3-ga postposition with-3-ga NA
11 meet postposition-on-1-ma NA NA postposition-on-1-ma NA NA NA
13 win NA postposition-for-2-len NA NA NA NA NA
13 win postposition-on-1-ma NA NA postposition-on-1-ma NA postposition-on-1-ma NA
13 win NA NA NA NA NA NA postposition-on-2-xan
14 leave NA NA NA NA NA NA belly-2-xono
14 leave NA NA NA NA NA NA stomach
15 chase NA postposition with-2-yan NA NA NA NA NA
15 chase NA NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
17 say NA NA NA NA NA NA postposition with-2-yan
17 say postposition-for-2-len postposition-for-2-len NA postposition-for-2-len NA NA NA
17 say NA NA postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma NA
18 hold NA NA NA NA arm-2-je arm-2-je NA
18 hold NA NA NA NA hand hand NA
18 hold NA NA NA postposition-on-1-ma NA NA NA
18 hold NA surface-1-ta NA NA NA NA NA
19 catch NA NA NA NA NA NA postposition at-la
19 catch NA postposition-on-1-ma NA NA NA NA NA
21 reach NA NA NA NA NA NA postposition at-la
21 reach NA postposition-for-2-len NA NA NA NA NA
21 reach NA NA NA NA postposition-on-1-ma postposition-on-1-ma NA
21 reach NA NA NA surface-1-ta NA NA NA
22 touch NA NA NA NA NA NA postposition at-la
22 touch NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
23 fight sb NA postposition by-ba NA NA NA NA NA
23 fight sb postposition with-3-ga NA postposition with-3-ga NA NA NA NA
24 be friends with NA NA NA NA NA back-1-pulu NA
24 be friends with NA postposition with-2-yan NA NA NA NA NA
24 be friends with NA NA postposition with-3-ga NA postposition with-3-ga postposition with-3-ga NA
24 be friends with NA NA NA side-3-fe NA NA NA
25 think about sb NA NA NA NA NA NA postposition at-la
25 think about sb postposition in-3-gi NA NA NA NA NA NA
25 think about sb NA postposition-for-2-len NA NA NA NA NA
25 think about sb NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
25 think about sb NA NA NA surface-1-ta NA NA NA
28 wait postposition-for-1-gon NA NA NA NA NA NA
29 forget NA NA NA NA NA NA back-3-xo
29 forget postposition with-3-ga NA NA NA NA NA NA
29 forget NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
30 depend on NA NA NA NA NA NA postposition at-la
30 depend on NA postposition by-ba NA NA NA NA NA
30 depend on postposition-on-1-ma NA NA NA NA NA NA
35 avoid postposition with-3-ga NA NA NA NA NA NA
35 avoid NA postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
37 mock NA NA postposition in-2-sun NA NA NA NA
37 mock postposition-on-1-ma NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
38 have NA NA NA NA arm-2-je arm-2-je NA
38 have NA NA NA NA hand hand NA
38 have NA palm of hand-degere NA NA NA NA NA
38 have postposition-for-1-gon NA NA NA NA NA NA
38 have NA NA NA NA NA NA side-3-fe
39 seek NA NA NA postposition with-3-ga NA NA NA
40 paint NA postposition-on-1-ma NA NA NA postposition-on-1-ma NA
41 bite NA postposition-on-1-ma NA NA NA NA NA
42 be deprived NA NA NA postposition with-3-ga NA NA NA
42 be deprived NA NA NA NA NA NA postposition at-la
42 be deprived NA NA NA NA postposition in-2-sun postposition in-2-sun NA
42 be deprived NA NA NA postposition in-3-gi NA NA NA
42 be deprived NA NA postposition-on-1-ma NA NA NA NA
46 love NA NA NA postposition with-3-ga NA NA NA
46 love NA NA postposition-for-3-be NA NA NA NA
46 love NA postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
46 love NA NA NA NA NA NA side-3-fe
48 dream NA NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
50 put on NA postposition-on-1-ma NA NA NA NA NA
51 call smth NA NA postposition with-3-ga NA postposition with-3-ga postposition with-3-ga NA
51 call smth NA NA NA postposition-for-2-len NA NA NA
52 punish NA postposition in-3-gi NA NA NA NA NA
52 punish NA NA postposition-on-1-ma NA NA NA NA
52 punish surface-1-ta NA NA NA NA NA NA
53 attack NA NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma
53 attack NA NA NA NA NA NA postposition-on-2-xan
53 attack surface-1-ta surface-1-ta NA NA NA NA NA
54 be filled with smth NA NA NA NA NA NA postposition at-la
54 be filled with smth NA postposition with-2-yan NA NA NA NA NA
54 be filled with smth postposition with-3-ga NA postposition with-3-ga postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
55 find smth postposition-on-1-ma NA NA NA NA NA NA
56 lack NA NA NA 0 NA NA NA
56 lack NA NA NA postposition with-3-ga NA NA NA
57 hate, detest NA NA NA postposition in-3-gi NA NA NA
57 hate, detest postposition-for-1-gon NA NA NA NA NA NA
57 hate, detest NA postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
58 like NA NA NA side-3-fe NA NA NA
58 like NA NA NA 0 NA NA NA
58 like NA NA NA NA NA NA postposition with-2-yan
58 like NA postposition-for-2-len NA postposition-for-2-len NA NA NA
58 like NA NA postposition-for-3-be NA NA NA NA
58 like NA NA NA NA postposition-on-1-ma NA NA
59 need NA NA NA NA NA NA postposition at-la
59 need NA NA postposition by-ba NA NA NA NA
59 need postposition-on-1-ma NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
60 surround around-zi around-zi NA NA NA NA NA
60 surround NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA NA
61 be left NA NA NA NA arm-2-je arm-2-je NA
61 be left NA NA NA NA hand hand NA
61 be left NA palm of hand-degere NA NA NA NA NA
62 respond NA NA NA NA postposition-on-1-ma NA NA
64 be different NA NA NA NA NA NA postposition at-la
64 be different postposition-on-1-ma postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
65 fall behind NA NA back-1-pulu NA back-1-pulu back-1-pulu NA
65 fall behind NA NA NA NA NA NA back-3-xo
65 fall behind NA bottom-1-ju NA NA NA NA NA
65 fall behind NA NA NA side-3-fe NA NA NA
67 smell NA postposition with-2-yan NA NA NA NA NA
67 smell NA NA postposition-for-3-be NA NA NA NA
67 smell postposition-on-1-ma NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
67 smell NA NA NA NA NA NA trace-1-pon
68 traverse, cross NA NA NA surface-1-ta NA NA NA
73 approach NA around-zi NA NA NA NA NA
73 approach postposition-on-1-ma NA NA NA NA NA postposition-on-1-ma
74 leave NA NA NA NA NA NA postposition at-la
75 75) cover surface-1-ta surface-1-ta NA NA NA NA NA
75 75) cover postposition-on-1-ma postposition-on-1-ma NA NA NA NA NA
75 75) cover NA surface-1-ta NA surface-1-ta NA NA NA
76 76) remember NA NA postposition in-2-sun NA NA NA NA
76 76) remember NA NA NA NA NA NA postposition at-la
76 76) remember NA postposition in-3-gi NA NA NA NA NA
76 76) remember postposition with-3-ga NA NA postposition with-3-ga NA NA NA
76 76) remember NA NA postposition-on-1-ma NA postposition-on-1-ma NA NA
76 76) remember NA NA NA surface-1-ta NA NA NA
76 76) remember NA NA NA NA NA surface-2-ga NA
77 77) help NA postposition by-ba postposition by-ba NA NA NA NA
77 77) help NA NA NA postposition-on-1-ma NA postposition-on-1-ma NA
77 77) help NA NA NA NA trace-1-pon NA NA
79 79) fall into NA NA NA NA NA NA postposition-on-2-xan
80 80) cut with NA postposition with-2-yan NA NA NA NA NA
80 80) cut with postposition with-3-ga NA postposition with-3-ga postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
81 81) stick to smth NA NA postposition by-ba NA NA NA NA
81 81) stick to smth postposition-on-1-ma NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
81 81) stick to smth NA surface-1-ta NA NA NA NA NA
82 82) lose to smb NA NA NA NA NA NA arm-1-bolo
82 82) lose to smb NA NA NA NA NA NA hand
82 82) lose to smb NA NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
83 83) be happy about, se rejouir de NA postposition by-ba NA NA NA NA NA
83 83) be happy about, se rejouir de NA NA NA NA NA NA postposition at-la
83 83) be happy about, se rejouir de NA postposition-on-1-ma postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma NA
84 84) speak with NA NA NA side-3-fe NA NA NA
84 84) speak with postposition-for-2-len NA NA postposition-for-2-len NA NA NA
84 84) speak with NA NA NA NA NA NA side-3-fe
85 85) give birth to NA NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
86 86) let fall smth NA NA NA NA NA NA arm-1-bolo
86 86) let fall smth NA NA NA NA NA NA hand
87 87) diriger, gouverner before-dhie NA NA NA NA NA NA
87 87) diriger, gouverner NA NA NA NA NA NA postposition with-2-yan
88 88) miss postposition with-3-ga NA NA NA NA NA NA
88 88) miss NA NA NA NA NA NA postposition at-la
88 88) miss surface-1-ta NA NA NA NA NA NA
89 89) follow NA NA back-1-pulu NA NA back-1-pulu NA
89 89) follow NA bottom-1-ju NA NA NA NA NA
89 89) follow occiput-3-kee NA NA NA NA NA NA
89 89) follow NA NA NA NA NA NA postposition at-la
90 90) climb down NA NA NA NA NA NA postposition at-la
91 91) listen to smth postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma NA postposition-on-1-ma NA
91 91) listen to smth NA NA NA NA NA NA side-3-fe
92 92) listen to smb NA NA NA NA NA NA postposition with-2-yan
92 92) listen to smb NA NA postposition-for-3-be NA NA NA NA
93 93) hear NA NA postposition-on-1-ma NA NA NA NA
94 94) mix up with smth NA postposition with-2-yan NA NA NA NA NA
94 94) mix up with smth postposition with-3-ga NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
97 97) appear in a dream NA NA NA NA NA NA postposition at-la
97 97) appear in a dream postposition with-3-ga NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
98 98) agree NA NA NA postposition by-ba NA NA NA
98 98) agree NA NA NA NA postposition in-2-sun NA NA
98 98) agree postposition-on-1-ma NA postposition-on-1-ma NA NA NA postposition-on-1-ma
99 99) dispute NA postposition by-ba NA NA NA NA NA
99 99) dispute NA postposition with-2-yan NA NA NA NA NA
99 99) dispute postposition with-3-ga NA NA NA NA NA NA
99 99) dispute NA NA NA postposition-on-1-ma NA NA NA
100 100) cost NA NA NA NA NA NA postposition with-2-yan
100 100) cost NA NA postposition with-3-ga postposition with-3-ga postposition with-3-ga postposition with-3-ga NA
101 101) shoot at NA postposition by-ba NA NA NA NA NA
101 101) shoot at postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma NA postposition-on-1-ma NA
103 103) lose NA NA NA NA NA NA back-3-xo
104 104) drown NA NA bottom-2-bu NA NA NA NA
104 104) drown NA NA NA NA NA NA postposition at-la
104 104) drown postposition by-ba NA NA postposition by-ba NA NA NA
104 104) drown NA NA NA NA postposition in-2-sun postposition in-2-sun NA
107 107) kiss NA postposition-on-1-ma NA NA NA NA NA
110 110) respect postposition in-3-gi NA NA NA NA NA NA
110 110) respect NA NA postposition-on-1-ma NA NA NA NA
111 111) disdain NA NA NA NA NA NA postposition at-la
111 111) disdain postposition in-3-gi NA NA NA NA NA NA
111 111) disdain NA postposition-on-1-ma NA postposition-on-1-ma postposition-on-1-ma NA NA
111 111) disdain NA NA NA side-3-fe NA NA NA
111 111) disdain NA NA NA NA NA trace-1-pon NA
112 112) be happy about NA NA NA 0 NA NA NA
112 112) be happy about NA NA NA NA NA NA postposition at-la
112 112) be happy about NA NA NA NA NA NA postposition with-2-yan
112 112) be happy about NA NA postposition-on-1-ma NA NA NA NA
112 112) be happy about NA NA NA side-3-fe NA NA NA
112 112) be happy about NA NA NA NA trace-1-pon trace-1-pon NA
113 113) fall in love with NA NA NA NA NA NA postposition at-la
113 113) fall in love with postposition in-3-gi NA NA NA NA NA NA
113 113) fall in love with NA NA postposition with-3-ga NA NA NA NA
113 113) fall in love with NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA
114 114) trust in sb NA NA NA NA NA NA postposition at-la
114 114) trust in sb NA surface-1-ta NA NA surface-1-ta NA NA
115 115) have pity of NA NA NA NA NA NA postposition at-la
115 115) have pity of NA NA NA postposition-on-1-ma NA postposition-on-1-ma NA
116 116) be envious of postposition with-3-ga NA NA NA NA NA NA
116 116) be envious of NA postposition-on-1-ma NA postposition-on-1-ma NA postposition-on-1-ma NA
116 116) be envious of NA NA NA NA trace-1-pon NA NA
117 117) be angry with NA postposition by-ba NA NA NA NA NA
117 117) be angry with NA NA postposition-on-1-ma postposition-on-1-ma postposition-on-1-ma NA NA
117 117) be angry with side-3-fe NA NA NA NA NA NA
117 117) be angry with NA NA NA NA NA trace-1-pon NA
117 117) be angry with NA NA NA NA NA NA under-1-kodo
118 118) be surprised about NA NA NA NA NA NA postposition at-la
118 118) be surprised about postposition with-3-ga NA postposition with-3-ga postposition with-3-ga NA NA NA
118 118) be surprised about NA postposition-on-1-ma NA NA NA postposition-on-1-ma NA
119 119) love smth NA NA NA NA NA NA postposition with-2-yan
119 119) love smth NA NA NA postposition with-3-ga NA NA NA
119 119) love smth NA postposition-for-2-len NA NA NA NA NA
119 119) love smth NA NA NA NA postposition-on-1-ma postposition-on-1-ma NA
120 120) enjoy/ take pleasure in NA postposition with-2-yan NA NA NA NA NA
120 120) enjoy/ take pleasure in NA NA postposition with-3-ga NA NA NA NA
121 121) want NA postposition by-ba NA NA NA NA NA
121 121) want NA NA NA postposition with-3-ga NA NA NA
121 121) want NA NA postposition-on-1-ma NA NA NA NA
121 121) want side-3-fe NA NA side-3-fe NA NA side-3-fe
121 121) want NA NA NA NA trace-1-pon trace-1-pon NA
122 122) be angry with = 117? NA postposition by-ba NA NA NA NA NA
122 122) be angry with = 117? NA NA NA NA trace-1-pon NA NA
123 123) take offense by sb before-dhie NA NA NA NA NA NA
123 123) take offense by sb NA NA NA NA postposition in-2-sun NA NA
123 123) take offense by sb NA NA NA NA NA postposition with-3-ga NA
123 123) take offense by sb NA postposition-on-1-ma NA NA NA NA NA
124 124) make sad sb NA NA NA NA NA NA postposition at-la
124 124) make sad sb NA NA postposition-on-1-ma NA NA NA NA
124 124) make sad sb NA NA NA NA trace-1-pon NA NA
125 125) be surprised by NA postposition-on-1-ma NA NA postposition-on-1-ma NA NA
126 126) despise sb NA NA NA NA postposition-on-1-ma NA postposition-on-1-ma
127 127) be sad because of sb NA NA postposition by-ba NA NA NA NA
127 127) be sad because of sb NA NA NA postposition with-3-ga NA NA NA
127 127) be sad because of sb NA postposition-on-1-ma NA NA postposition-on-1-ma NA NA
127 127) be sad because of sb NA NA NA NA NA NA side-3-fe
128 128) be annoyed by sb NA NA NA mouth-1-da NA NA NA
128 128) be annoyed by sb postposition-for-2-len NA NA NA NA NA NA
128 128) be annoyed by sb NA postposition-on-1-ma NA NA NA NA NA
128 128) be annoyed by sb NA NA NA NA trace-1-pon NA NA
129 129) sympathise to sb NA NA NA NA NA NA postposition-on-1-ma
130 130) be embarrassed by sb NA NA NA 0 NA NA NA
130 130) be embarrassed by sb NA NA NA postposition with-3-ga NA NA NA
130 130) be embarrassed by sb NA NA postposition-on-1-ma NA NA postposition-on-1-ma NA

Если из двух компонентов совпал 1, мы ставим 0.5, если из трех 1, то 0.33, если из трех два, то 0.66. Если две конструкции, то учитываем максимальное совпадение

Code
complex_verbs <- readxl::read_xlsx("../GM_merged_wide_verb_meaning.xlsx")

complex_verbs |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts),
         number = str_c(number, "_", id)) |> 
  select(number, language, form) |> 
  arrange(number, language, form) ->
  analyze_df

map(sort(unique(analyze_df$number)), function(i){
  analyze_df |> 
    filter(number == i) |> 
    pairwise_count(language, form)  |> 
    mutate(total = case_when(str_detect(item1, "4") | str_detect(item2, "4") ~ 4,
                             str_detect(item1, "3") | str_detect(item2, "3") ~ 3, 
                             TRUE ~ 2),
           value = n/total,
           number = i,
           item1 = str_remove(item1, "_\\d"),
           item2 = str_remove(item2, "_\\d"))  |> 
    select(number, item1, item2, value, total) |> 
    filter(item1 != item2)
}) |> 
  list_rbind() ->
  result

result |> 
  group_by(item1, item2) |> 
  summarise(value = sum(value),
            total = sum(total)) |> 
  ggplot(aes(item1, item2))+
  geom_tile(aes(fill = value), colour = "white") +
  geom_text(aes(label = round(value, 3)), colour = "white") +
  scale_fill_gradient(low = "lightblue", high = "navy")+
  coord_fixed()+
  labs(x = "", y = "", title = "Heatmap with all stimuli") +
  theme(legend.position = "bottom")

Чем выше значение, тем больше сходство. У меня есть подозрение, что я где-то налажал (ну и я сейчас выкинул все, что я подозревал в ошибке), но оно говорит что-то не совсем дикое: огромное сходство кпелле и коно, большое сходство кпелле и мано… Чудеса…

Я преобразую все вот в такую таблицу. К language я привинчиваю количество частей. Так что, например, первые две строчки нужно читать так: в стимуле номер 1 в языке Dan_Gweetaa 2 части break и interior. Так как таблица отсортирована по номеру стимула, то больше сложных глаголов в первом стимуле нет.

Я сделал некоторый хак, который позволяет делать кластеризацию. Я завожу признаки, которые состоят из фрагмента значения и номера, в котором он встречается:

Code
analyze_df  |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0)
form Bamana Dan_Gweetaa Guro Kpelle Looma Mano Kono
hit_101_1 1 0 0 0 0 0 0
powder_101_1 1 0 0 0 0 0 0
go.up_101_1 0 1 0 0 0 0 0
gun_101_1 0 1 0 1 1 0 0
bow_101_1 0 0 1 0 0 0 0
throw_101_1 0 0 1 0 0 0 0
fall_101_1 0 0 0 1 0 0 0
drop_101_1 0 0 0 0 1 1 0
fusil_101_1 0 0 0 0 0 1 0
implement_102_1 0 0 0 0 0 0 1
mouth_107_1 0 0 0 0 1 0 0
touch_107_1 0 0 0 0 1 0 0
enter_107_2 0 0 0 0 0 0 1
mouth_107_2 0 0 0 0 0 0 1
mouth_107_3 1 0 0 0 0 0 0
smell_107_3 1 0 0 0 0 0 0
say_108_1 0 1 0 0 0 0 0
voice_108_1 0 1 0 0 0 0 0
dire_108_1 0 0 1 0 0 0 0
surface_108_1 0 0 1 0 0 0 0
book_108_2 0 0 0 1 0 0 0
implement_108_2 0 0 0 1 0 0 0
implement_109_1 0 0 0 0 0 1 0
mouvement_109_1 0 0 0 0 0 1 0
put_on_10_1 0 0 1 0 0 0 0
work_10_1 0 0 1 0 0 0 0
surface_10_1 0 0 1 0 0 0 0
mouth_10_2 1 0 0 0 0 0 0
press_10_2 1 0 0 0 0 0 0
give_110_1 0 0 0 0 1 1 0
head_110_1 0 0 0 0 1 0 0
respect_110_1 0 0 0 0 0 1 0
be_heavy_110_2 0 0 0 1 0 0 1
prev_110_2 0 0 0 1 0 1 1
respect_110_2 0 0 0 0 0 1 0
sit_110_2 0 0 0 0 0 1 0
do_110_2 0 0 0 0 0 1 0
heavy_110_2 0 0 0 0 0 1 0
be_110_3 0 1 0 0 0 0 0
respect_110_3 0 1 0 0 0 0 0
accompany_111_1 0 0 1 0 0 0 0
implement_111_1 0 0 0 0 0 1 0
neglect_111_1 0 0 0 0 0 1 0
drop_111_2 0 0 0 0 0 1 0
eye_111_2 0 0 0 0 0 1 0
respect_111_2 0 0 0 0 0 1 0
spoil_111_2 0 0 0 0 0 1 0
be.neg_111_3 0 0 0 0 0 0 1
love_111_3 0 0 0 0 0 0 1
appear_111_4 0 1 0 0 0 0 0
disgust_111_4 0 1 0 0 0 0 0
chase_111_5 0 1 0 0 0 0 0
disgust_111_5 0 1 0 0 0 0 0
eye_112_1 1 0 0 0 0 0 0
fill_112_1 1 0 0 0 0 0 0
be_nice_112_2 0 0 0 0 1 0 1
stomach.loc_112_2 0 0 0 0 1 0 1
be_good_112_2 0 0 0 1 0 0 0
stomach_112_2 0 0 0 1 0 1 0
become_beautiful_112_2 0 0 0 0 0 1 0
go_up_113_1 0 1 0 0 0 0 0
love_113_1 0 1 0 1 0 1 1
fall_113_1 0 0 0 0 0 0 1
drop_113_1 0 0 0 1 0 1 0
do_115_1 0 0 0 0 0 1 0
pity_115_1 0 0 0 0 0 1 0
see_115_2 0 0 0 0 0 0 1
catch_115_2 0 0 0 0 1 0 0
pain_115_2 0 0 0 0 1 0 0
surface_115_2 0 0 0 0 1 0 0
do_115_2 0 0 0 0 0 1 0
pity_115_2 0 0 0 0 0 1 0
do_115_3 0 1 0 0 0 0 0
pity_115_3 0 1 0 0 0 0 0
be_115_4 0 0 0 1 0 0 0
eye_115_4 0 0 0 1 0 0 0
prev-love_115_4 0 0 0 1 0 0 0
do_116_1 0 1 0 0 0 0 0
jealousy_116_1 0 1 0 0 0 0 0
jelousy_116_1 0 0 1 0 0 0 0
wear_116_1 0 0 1 0 0 0 0
cut_116_2 0 0 0 0 0 1 0
heart_116_2 0 0 0 0 0 1 0
anger_117_1 0 1 0 0 0 0 0
take_117_1 0 1 0 0 0 0 0
heart_117_2 0 0 0 0 0 0 1
sting_117_2 0 0 0 0 0 0 1
attach_117_2 0 0 0 0 0 1 0
face_117_2 0 0 0 0 0 1 0
ball_117_2 0 0 0 0 0 1 0
boil_117_3 0 0 1 0 0 0 0
heart_117_3 0 0 1 1 1 0 0
sting_117_3 0 0 0 1 0 0 0
be_bitter_117_3 0 0 0 0 1 0 0
envelop_118_1 0 0 1 0 0 0 0
interior_118_1 0 0 1 0 0 0 0
on.it_118_1 0 0 0 1 0 0 0
stomach.loc_118_1 0 0 0 1 0 0 0
throw_118_1 0 0 0 1 0 1 0
be_dry_118_1 0 0 0 0 1 0 0
mouth_118_1 0 0 0 0 1 0 0
stomach_118_1 0 0 0 0 0 1 0
do_118_2 0 1 0 0 0 0 0
matter_118_2 0 1 0 0 0 0 0
surprise_118_2 0 1 0 0 0 0 0
with_118_2 0 1 0 0 0 0 0
envelop_118_3 0 0 1 0 0 0 0
interior_118_3 0 0 1 0 0 0 0
envelop_118_4 0 0 1 0 0 0 0
interior_118_4 0 0 1 0 0 0 0
do_119_1 0 1 0 0 0 0 0
love_119_1 0 1 0 0 0 0 0
be_119_2 0 0 0 1 0 0 1
love_119_2 0 0 0 1 0 0 1
profit_120_1 1 0 0 0 0 0 0
remove_120_1 1 0 0 0 0 0 0
see_120_1 0 1 0 0 0 0 0
taste_120_1 0 1 0 0 0 1 0
take_120_1 0 0 0 0 0 1 0
become_happy_120_2 0 0 1 0 0 0 0
interior_120_2 0 0 1 0 0 0 0
pleasure_120_2 0 0 0 0 1 0 0
remove_120_2 0 0 0 0 1 0 0
be_good_120_3 0 0 0 1 0 0 0
stomach_120_3 0 0 0 1 0 0 0
become_happy_121_1 0 0 1 0 0 0 0
interior_121_1 0 0 1 0 0 0 0
do_121_2 0 1 0 0 0 0 0
love_121_2 0 1 0 0 0 0 0
heart_122_1 0 0 0 0 0 0 1
sting_122_1 0 0 0 0 0 0 1
boil_122_2 0 0 1 0 0 0 0
heart_122_2 0 0 1 0 0 0 0
implement_123_1 0 0 0 0 0 0 1
passage_123_1 0 0 0 0 0 0 1
interior_123_2 0 0 0 0 0 1 0
search_123_2 0 0 0 0 0 1 0
speech_123_2 0 0 0 0 0 1 0
aggravate_123_3 0 0 1 0 0 0 0
speech_123_3 0 0 1 0 0 0 0
finish_124_1 0 0 0 0 0 0 1
enter_124_2 1 0 0 0 0 0 0
guilt_124_2 1 0 0 0 0 0 0
heart_124_3 0 1 0 0 0 0 0
wound_124_3 0 1 0 0 0 0 0
face_124_3 0 0 0 1 0 0 0
kill_124_3 0 0 0 1 0 0 0
pass_124_3 0 0 0 0 1 0 0
do_124_4 0 0 0 0 0 1 0
pitoyable_124_4 0 0 0 0 0 1 0
pain_124_5 0 0 0 0 1 0 0
remove_124_5 0 0 0 0 1 0 0
on.it_125_1 0 0 0 0 0 0 1
stomach.loc_125_1 0 0 0 0 0 0 1
throw_125_1 0 0 0 0 0 0 1
envelop_125_2 0 0 1 0 0 0 0
interior_125_2 0 0 1 0 0 0 0
envelop_125_3 0 0 1 0 0 0 0
interior_125_3 0 0 1 0 0 0 0
end_125_4 1 0 0 0 0 0 0
means_125_4 1 0 0 0 0 0 0
envelop_125_5 0 0 1 0 0 0 0
interior_125_5 0 0 1 0 0 0 0
implement_126_1 0 0 0 1 0 0 1
insult_126_1 0 0 0 0 0 0 1
pass_126_1 0 0 0 1 0 0 0
do_126_2 0 1 0 0 0 0 0
scorn_126_2 0 1 0 0 0 0 0
install_126_2 0 0 0 0 0 1 0
kill_126_2 0 0 0 0 0 1 0
cry_127_1 1 0 0 0 0 0 0
heart_127_1 1 0 0 0 0 0 0
pass_127_1 0 0 0 0 1 0 0
be_nice_127_2 0 0 0 0 0 0 1
stomach.loc_127_2 0 0 0 0 0 0 1
fire_127_3 0 0 1 0 0 0 0
install_127_3 0 0 1 0 0 0 0
interior_127_3 0 0 1 0 0 0 0
body_127_4 0 0 0 0 0 1 0
break_127_4 0 0 0 0 0 1 0
fill_127_5 0 0 0 0 0 1 0
mouth_127_5 0 0 0 0 0 1 0
heart_128_1 0 0 0 0 0 0 1
sting_128_1 0 0 0 0 0 0 1
boil_128_2 0 0 1 0 0 0 0
heart_128_2 0 0 1 0 0 0 0
implement_129_1 0 0 0 0 0 0 1
sit_129_1 0 0 0 0 0 0 1
cold_130_1 0 0 0 0 1 0 0
heart_130_1 0 0 0 0 1 0 0
inside_130_2 0 1 0 0 0 0 0
obey_130_2 0 1 0 0 0 0 0
bone_13_1 0 1 0 0 0 0 0
take_13_1 0 1 0 0 0 0 0
gain_13_2 1 0 0 0 0 0 0
power_13_2 1 0 0 0 0 0 0
do_13_3 0 1 0 0 0 0 0
head_13_3 0 1 0 0 0 0 0
hand_13_4 0 0 0 1 0 1 0
raise_13_4 0 0 0 1 0 0 0
be_able_13_4 0 0 0 0 0 1 0
prev_16_1 0 0 0 1 0 0 0
stretch_16_1 0 0 0 1 0 0 0
be_18_1 0 0 0 0 0 1 0
hand_18_1 0 0 0 0 0 1 0
break_1_1 0 1 0 0 0 0 0
interior_1_1 0 1 0 0 0 0 0
breast_20_1 0 0 0 1 1 0 1
milk_20_1 0 0 0 1 0 1 1
remove_20_1 0 0 0 1 1 1 1
cold_20_1 0 0 0 0 1 0 0
gain_21_1 0 0 0 1 0 0 0
prev_21_1 0 0 0 1 0 0 0
implement_21_1 0 0 0 0 0 1 0
hand_22_1 0 0 0 1 0 1 0
sit_22_1 0 0 0 1 0 0 0
stretch_22_1 0 0 0 1 0 0 0
touch_22_1 0 0 0 0 0 1 0
fight_23_1 0 1 1 0 0 0 0
wage_23_1 0 1 0 0 0 0 0
sell_23_1 0 0 1 0 0 0 0
battle_23_2 0 0 0 1 0 1 0
fight_23_2 0 0 0 1 0 1 0
do_24_1 0 0 0 0 0 1 0
friendship_24_1 0 0 0 0 0 1 0
be_24_2 0 0 1 0 0 0 0
friend_24_2 0 0 1 0 0 0 0
do_25_1 0 1 0 0 0 0 0
time_25_1 0 1 0 0 0 0 0
mind_25_1 0 0 0 0 1 0 0
sit_25_1 0 0 0 0 1 0 0
interior_25_2 0 0 1 0 0 0 0
remember_25_2 0 0 1 0 0 0 0
stomach_25_2 0 0 1 0 0 0 0
spirit_25_2 0 0 0 1 0 1 1
surface_25_2 0 0 0 1 0 0 1
walk_25_2 0 0 0 1 0 1 1
in_front_of_25_2 0 0 0 0 0 1 0
come_25_3 0 0 0 0 0 1 0
memory_25_3 0 0 0 0 0 1 0
eye_28_1 0 1 0 0 0 0 0
leave_28_1 0 1 0 0 0 0 0
in_front_of_28_2 0 0 1 0 0 0 0
wait_28_2 0 0 1 0 0 0 0
prev_28_2 0 0 0 1 0 1 1
unite_28_2 0 0 0 1 0 1 1
appear_29_1 0 1 0 0 0 0 0
heart_29_1 0 1 0 0 0 1 0
be_mistaken_29_1 0 0 1 0 0 0 0
interior_29_1 0 0 1 0 0 0 0
forget_29_1 0 0 0 0 0 1 0
call_31_1 0 0 0 0 0 1 0
do_31_1 0 0 0 0 0 1 0
implement_35_1 0 0 0 1 0 0 0
back_35_1 0 0 0 0 0 1 0
put_in_35_1 0 0 0 0 0 1 0
lie_35_2 0 0 0 1 1 0 0
prev_35_2 0 0 0 1 0 1 0
surface_35_2 0 0 0 0 1 0 0
put_on_35_2 0 0 0 0 0 1 0
do_36_1 0 1 0 0 0 1 0
thing:CSTR_36_1 0 1 0 0 0 0 0
thing_36_1 0 0 0 0 0 1 0
install_37_1 0 1 0 0 0 0 0
mockery_37_1 0 1 0 0 0 0 0
split_37_1 0 1 0 0 0 0 0
laughter_37_1 0 0 0 0 1 0 0
remove_37_1 0 0 0 0 1 0 0
mockery_37_2 1 0 0 0 0 0 0
remove_37_2 1 0 0 0 0 0 0
tithe_37_2 1 0 0 0 0 0 0
mockery_37_3 1 0 0 0 0 0 0
remove_37_3 1 0 0 0 0 0 0
tithe_37_3 1 0 0 0 0 0 0
eye_39_1 0 0 0 0 0 1 0
put_on_39_1 0 0 0 0 0 1 0
walk_39_1 0 0 0 0 0 1 0
frighten_3_1 0 0 0 0 0 1 0
prev_3_1 0 0 0 0 0 1 0
kaolin_40_1 0 0 1 0 0 0 0
spread_40_1 0 0 1 0 0 0 0
water_40_1 0 0 1 0 0 0 0
go_up_40_1 0 0 0 1 0 0 0
painting_40_1 0 0 0 1 0 0 0
go_up_40_2 0 0 0 0 0 0 1
prev_40_2 0 0 0 0 0 0 1
install_41_1 0 0 1 0 0 0 0
tooth_41_1 0 0 1 0 0 0 0
install_41_2 0 0 0 0 0 1 0
tooth_41_2 0 0 0 0 0 1 0
break_44_1 0 0 0 1 0 0 1
interior_44_1 0 0 0 1 0 0 1
do_45_1 0 1 0 0 0 0 0
falsehood_45_1 0 1 0 0 0 0 0
praise_45_1 0 0 0 0 0 0 1
prev_45_1 0 0 0 0 0 1 1
go_up_45_1 0 0 0 1 0 0 0
prev2_45_1 0 0 0 1 0 0 0
monter_45_1 0 0 0 0 0 1 0
do_46_1 0 0 0 0 0 1 0
love_46_1 0 0 0 0 0 1 0
accompany_46_2 0 0 1 0 0 0 0
heart_46_2 0 0 1 0 0 0 0
do_46_3 0 0 0 0 0 1 0
face_46_3 0 0 0 0 0 1 0
love_46_3 0 0 0 0 0 1 0
matter_46_3 0 0 0 0 0 1 0
remove_46_3 0 0 0 0 0 1 0
do_46_4 0 1 0 0 0 0 0
love_46_4 0 1 0 0 0 0 0
be_46_5 0 0 0 1 0 1 1
love_46_5 0 0 0 1 0 1 1
implement_47_1 0 0 0 0 0 1 0
movement_47_1 0 0 0 0 0 1 0
dream_48_1 0 0 0 1 0 1 1
implement_48_1 0 0 0 1 0 0 1
kill_48_1 0 0 0 0 0 1 0
interior_49_1 0 0 0 0 0 0 1
wash_49_1 0 0 0 1 0 1 1
prev_49_1 0 0 0 1 0 1 0
be_51_1 0 0 0 0 0 1 0
name_51_1 0 0 0 0 0 1 0
be_51_2 0 0 0 1 0 0 1
name_51_2 0 0 0 1 0 0 1
lie_52_1 0 1 0 0 1 0 0
matter_52_1 0 1 1 0 0 0 0
in_front_of_52_1 0 0 1 0 0 0 0
install_52_1 0 0 1 0 0 0 0
law_52_1 0 0 0 0 1 0 0
mouth_54_1 0 0 0 0 1 0 0
fill_54_2 0 0 0 1 0 1 1
mouth_54_2 0 0 0 1 0 1 1
interior_54_2 0 0 0 0 0 1 0
completeness_56_1 0 0 0 0 0 1 0
cop.NEG_56_1 0 0 0 0 0 1 0
do_57_1 0 1 0 0 0 0 0
repugnance_57_1 0 1 0 0 0 0 0
take_57_2 0 0 0 0 1 0 0
boil_57_3 0 0 1 0 0 0 0
heart_57_3 0 0 1 0 0 0 0
end_57_4 0 0 0 1 0 0 0
love_57_4 0 0 0 1 0 0 0
be.neg_57_5 0 0 0 1 0 1 1
love_57_5 0 0 0 1 0 1 1
catch_58_1 0 0 0 1 0 1 0
heart_58_1 0 0 0 1 0 1 0
drop_58_1 0 0 0 0 0 1 0
catch_58_2 0 0 0 0 0 1 0
heart_58_2 0 0 0 0 0 1 0
do_58_3 0 1 0 0 0 0 0
love_58_3 0 1 0 0 0 0 0
be_58_4 0 0 0 0 0 0 1
love_58_4 0 0 0 0 0 0 1
be_59_1 0 0 0 1 0 1 1
prev-issue_59_1 0 0 0 0 0 0 1
prev-be_obliged_59_1 0 0 0 1 0 0 0
need_59_1 0 0 0 0 0 1 0
catch_59_2 0 1 0 0 0 0 0
need_59_2 0 1 0 0 0 0 0
be_59_3 0 0 0 1 0 1 0
prev-be_obliged_59_3 0 0 0 1 0 0 0
need_59_3 0 0 0 0 0 1 0
prev_60_1 0 0 0 1 0 1 0
turn_60_1 0 0 0 1 0 1 0
implement_62_1 0 0 0 0 0 0 1
back_62_2 0 0 0 1 0 0 0
prev2_62_2 0 0 0 1 0 0 0
return_62_2 0 0 0 1 0 0 0
speech_62_2 0 0 0 1 0 1 0
accept_62_2 0 0 0 0 0 1 0
interior_62_2 0 0 0 0 0 1 0
remove_62_2 0 0 0 0 0 1 0
surface_62_2 0 0 0 0 0 1 0
turn_62_2 0 0 0 0 0 1 0
answer_62_3 0 1 0 0 0 0 0
appear_62_3 0 1 0 0 0 0 0
question_62_3 0 1 0 0 0 0 0
board_63_1 1 0 0 0 0 0 0
open_63_1 1 0 0 0 0 0 0
enlever_63_1 0 0 1 0 0 0 0
in_front_of_63_1 0 0 1 0 0 0 0
implement_63_1 0 0 0 1 0 0 1
mouth_63_1 0 0 0 1 1 1 1
stand_63_1 0 0 0 0 1 0 0
remove_63_1 0 0 0 0 0 1 0
be_64_1 0 0 0 0 0 1 0
différent_64_1 0 0 0 0 0 1 0
field_66_1 0 0 0 0 0 0 1
make_66_1 0 0 0 0 0 0 1
cultivate_66_1 0 0 0 1 0 0 0
place_66_1 0 0 0 1 0 1 0
remove_66_1 0 0 0 0 0 1 0
do_66_2 0 1 0 0 0 0 0
field_66_2 0 1 0 0 0 0 0
install_67_1 0 0 1 0 0 0 0
smell_67_1 0 0 1 0 0 0 0
surface_67_1 0 0 1 0 0 0 0
be_67_2 0 0 0 1 0 1 1
smell_67_2 0 0 0 1 0 1 1
cross_68_1 0 0 0 0 0 0 1
interior_68_1 0 0 0 0 0 0 1
pass_68_1 0 0 0 0 1 0 0
surface_68_1 0 0 0 0 1 0 0
cut_68_1 0 0 0 0 0 1 0
remove_69_1 0 1 0 0 0 0 0
song_69_1 0 1 0 1 1 1 1
fall_69_1 0 0 0 1 0 0 1
drop_69_1 0 0 0 0 1 0 0
implement_69_1 0 0 0 0 0 1 0
kill_70_1 0 1 0 0 0 0 0
writing_70_1 0 1 0 0 0 0 0
drop_72_1 0 0 1 0 0 0 0
water_72_1 0 0 1 0 0 0 0
encounter_73_1 1 0 0 0 0 0 0
measure_73_1 1 0 0 0 0 0 0
catch_73_2 0 0 0 1 0 0 0
prev_73_2 0 0 0 1 0 0 0
lie_75_1 0 0 0 0 0 0 1
stretch_75_1 0 0 0 0 0 0 1
cover_75_2 1 0 0 0 0 0 0
mouth_75_2 1 0 0 0 0 0 0
in_front_of_75_2 0 0 1 0 0 0 0
unite_75_2 0 0 1 0 0 0 0
close_75_2 0 0 0 1 0 0 0
upper_surface_75_2 0 0 0 1 0 0 0
mind_76_1 0 0 0 0 1 0 0
sit_76_1 0 0 0 0 1 0 0
spirit_76_2 0 0 0 0 0 0 1
surface_76_2 0 0 0 0 0 0 1
walk_76_2 0 0 0 0 0 0 1
heart_76_2 0 0 0 0 0 1 0
wake_76_2 0 0 0 0 0 1 0
memory_76_3 0 0 0 0 0 1 0
take_76_3 0 0 0 0 0 1 0
descend_76_4 1 0 0 0 0 0 0
reason_76_4 1 0 0 0 0 0 0
heart_76_4 0 1 0 0 0 0 0
wake_76_4 0 1 0 0 0 0 0
be_76_5 0 0 0 1 0 0 0
spirit_76_5 0 0 0 1 0 0 0
come_76_6 0 0 0 0 0 1 0
memory_76_6 0 0 0 0 0 1 0
give_77_1 0 0 0 1 0 0 1
help_77_1 0 0 0 1 0 0 1
prev_77_1 0 0 0 0 0 1 0
unite_77_1 0 0 0 0 0 1 0
catch_77_2 0 1 0 0 0 0 0
surface_77_2 0 1 0 0 0 0 0
implement_77_2 0 0 0 1 0 0 0
prev_77_2 0 0 0 1 0 0 0
prev-help_77_2 0 0 0 1 0 0 0
hear_78_1 0 1 0 1 1 0 0
inside_78_1 0 1 0 0 0 0 0
speech_78_1 0 0 0 1 0 0 0
voice_78_1 0 0 0 0 1 0 0
belief_7_1 0 1 0 0 0 0 0
implement_7_1 0 1 0 0 0 0 0
heart_7_2 0 0 1 0 0 1 0
put_in_7_2 0 0 1 0 0 0 0
put_on_7_2 0 0 1 0 0 0 0
install_7_2 0 0 0 1 0 1 0
spirit_7_2 0 0 0 1 0 0 0
heart_7_3 0 0 0 0 0 1 0
install_7_3 0 0 0 0 0 1 0
collect_82_1 1 0 0 0 0 0 0
defeat_82_1 1 0 0 0 0 0 0
be_able_82_2 0 0 0 1 0 1 0
hand_82_2 0 0 0 1 0 1 0
in_83_1 1 0 0 0 0 0 0
please_83_1 1 0 0 0 0 0 0
soul_83_1 1 0 0 0 0 0 0
do_83_2 0 1 0 0 0 0 0
satisfaction_83_2 0 1 0 0 0 0 0
become_happy_83_2 0 0 1 0 0 0 0
interior_83_2 0 0 1 0 0 0 0
be_nice_83_2 0 0 0 1 1 0 1
stomach.loc_83_2 0 0 0 1 1 0 1
become_beautiful_83_2 0 0 0 0 0 1 0
stomach_83_2 0 0 0 0 0 1 0
in_83_3 1 0 0 0 0 0 0
please_83_3 1 0 0 0 0 0 0
soul_83_3 1 0 0 0 0 0 0
catch_83_3 0 1 0 0 0 0 0
heart_83_3 0 1 0 0 0 0 0
kill_84_1 0 1 0 0 0 0 0
talk_84_1 0 1 0 0 0 0 0
implement_84_1 0 0 0 0 0 1 0
jugement_84_1 0 0 0 0 0 1 0
install_84_2 0 0 0 1 0 1 1
talk_84_2 0 0 0 1 0 0 1
do_84_2 0 0 0 1 0 0 0
implement_84_2 0 0 0 1 0 0 0
chat_84_2 0 0 0 0 0 1 0
descend_85_1 0 0 0 1 0 0 1
mouth_85_1 0 0 0 1 0 0 1
implement_87_1 0 0 0 1 0 0 0
prev2-speech_87_1 0 0 0 1 0 0 0
surface_87_1 0 0 0 0 0 1 0
take_87_1 0 0 0 0 0 1 0
interior_87_1 0 0 0 0 0 1 0
say_87_1 0 0 0 0 0 1 0
speech_87_1 0 0 0 0 0 1 0
heart_88_1 0 1 0 0 0 0 0
implement_88_1 0 1 0 0 0 0 0
catch_88_2 0 0 0 1 0 1 1
love_88_2 0 0 0 1 0 1 1
behind_89_1 0 0 0 0 0 1 0
chase_89_1 0 0 0 0 0 1 0
down_90_1 0 1 0 0 0 0 0
go_90_1 0 1 0 0 0 0 0
ear_91_1 0 0 0 0 1 0 0
stand_91_1 0 0 0 0 1 0 0
ear_91_2 1 1 0 1 0 1 0
prev-stand_91_2 1 0 0 0 0 0 0
leave_91_2 0 1 0 0 0 0 0
install_91_2 0 0 0 1 0 1 0
ear_91_3 0 0 0 0 0 0 1
install_91_3 0 0 0 0 0 0 1
hear_91_3 0 0 0 1 0 0 0
speech_91_3 0 0 0 1 0 0 0
respect_92_1 0 1 0 0 0 0 0
sit_92_1 0 1 0 0 0 0 0
ear_93_1 0 0 0 0 1 0 0
stand_93_1 0 0 0 0 1 0 0
hear_93_2 0 0 0 1 0 0 0
speech_93_2 0 0 0 1 0 1 0
entendre_93_2 0 0 0 0 0 1 0
finish_95_1 0 0 0 1 0 1 0
prev_95_1 0 0 0 1 0 1 0
dream_97_1 0 0 0 1 0 1 1
implement_97_1 0 0 0 1 0 0 1
kill_97_1 0 0 0 0 0 1 0
dream_97_2 0 1 0 0 0 0 0
remove_97_2 0 1 0 0 0 0 0
lie_98_1 0 0 0 0 1 0 0
speech_98_1 0 0 0 0 1 0 0
install_98_2 0 0 0 0 0 1 0
speech_98_2 0 0 0 0 0 1 0
install_98_3 0 0 0 0 0 1 0
speech_98_3 0 0 0 0 0 1 0
put_99_1 0 1 0 0 0 0 0
quarrel_99_1 0 1 0 0 0 0 0
make_99_2 0 0 0 1 0 0 1
work_99_2 0 0 0 0 0 0 1
argument_99_2 0 0 0 1 0 0 0
do_99_2 0 0 0 0 0 1 0
install_99_2 0 0 0 0 0 1 0
speech_99_2 0 0 0 0 0 1 0
impertinence_99_3 0 0 0 0 0 1 0
install_99_3 0 0 0 0 0 1 0

Все примеры, без фильтрации

Code
analyze_df  |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  t() |> 
  dist(method = "binary") ->
  complex_verbs_distance
Code
complex_verbs_distance |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics (all stimuli)",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs_distance  |> 
  neighborNet() |> 
  plot()
title(main = "Complex verbs semantics (all stimuli)")

Конечно, при таком варианте получается много единичных случаев, которые удлинняют палки и в кластеризации и в нейборнете, однако таких случаев, когда во всех семи языках есть форма.

Только стимулы, для которых есть все языки

Что если посмотреть только на те стимулы, для которых есть какая-то форма (сложный глагол или нет):

Code
analyze_df  |>
  mutate(old_number = str_extract(number, "\\d{1,3}_"),
         old_number = str_remove(old_number, "_")) |> 
  filter(old_number %in% to_keep) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  t() |> 
  dist(method = "binary") ->
  complex_verbs_distance_subset
Code
complex_verbs_distance_subset |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs_distance_subset  |> 
  neighborNet() |> 
  plot()
title(main = "Complex verbs semantics with common stimuli")

Только стимулы, для которых есть все языки, случайная этимология для каждого языка

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
complex_verbs |> 
  filter(number %in% to_keep) |> 
  group_by(number) |> 
  slice_sample(n = 1) |> 
  pivot_longer(names_to = "language", values_to = "verb_meaning", Guro:Kono) |> 
  filter(!is.na(verb_meaning)) |> 
  select(number, id, language, verb_meaning) |> 
  mutate(verb_meaning = str_remove_all(verb_meaning, "\\(.*?\\)"),
         verb_meaning = str_squish(verb_meaning),
         verb_meaning = str_split(verb_meaning, " ; ")) |>
  unnest_longer(verb_meaning) |> 
  separate_wider_delim(verb_meaning, delim = " ",
                       names = c("p1", "p2", "p3", "p4"),
                       too_few = "align_start") |> 
  filter(!is.na(p2)) |> 
  mutate(n_parts = case_when(!is.na(p4) ~ 4,
                             is.na(p4) & !is.na(p3) ~ 3,
                             is.na(p3) & !is.na(p2) ~ 2)) |> 
  pivot_longer(names_to = "p", values_to = "form", p1:p4) |> 
  mutate(form = case_when(str_detect(form, "\\?{1,3}") ~ NA,
                          TRUE ~ form)) |> 
  na.omit() |> 
  arrange(form, number, id) |> 
  mutate(language = str_c(language, "_", n_parts)) |> 
  select(number, id, language, form) |> 
  arrange(number, id, language, form) |> 
  mutate(form = str_c(form, "_", number),
         language = str_remove(language, "_\\d")) |> 
  distinct(language, form) |> 
  mutate(value = 1) |> 
  pivot_wider(names_from = language, values_from = value, values_fill = 0) |> 
  column_to_rownames("form") |>
  select(-number) |> 
  t() |> 
  dist(method = "binary") |> 
  hclust() |> 
  as.phylo()  %>%
  plot(main = "Complex verbs semantics with random equivalents and common stimuli",
       tip.color = colors$color[match(.$tip.label, colors$language)],
       direction = "downwards",
       cex = 1.5, 
       font = 2)

Code
library(lingtypology)
readxl::read_xlsx("../for_map.xlsx") |> 
  mutate(language_name = lang.gltc(glottocode)) ->
  for_map

map.feature(languages = for_map$language_name,
            latitude = for_map$latitude,
            longitude = for_map$longitude,
            color = "black", 
            width = 6) |> 
  map.feature(languages = for_map$language_name,
            latitude = for_map$latitude,
            longitude = for_map$longitude,
            features = for_map$color,
            color = for_map$color, 
            legend = FALSE,
            tile = "Esri.WorldGrayCanvas",
            label = for_map$language,
            label.position = "bottom", 
            label.hide = FALSE,
            minimap = TRUE,
            pipe.data = _)
Code
library(tidyverse)
mano_kpelle_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "mano + kpelle")

mano_kpelle_pre |> 
  rename(number = Number,
         construction = `Mano construction`,
         construction_type = `Mano extended construction type`,
         adposition = `Mano Postposition`,
         verb = `Mano verb`,
         verb_meaning = `Mano Complex verb meaning`) |> 
  mutate(language = "Mano") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  mano

mano_kpelle_pre |> 
  rename(number = Number,
         construction = `Kpelle construction`,
         construction_type = `Kpelle extended construction type`,
         adposition = `Kpelle postposition`,
         verb = `Kpelle verb`,
         verb_meaning = `Kpelle complex verb meaning`) |> 
  mutate(language = "Kpelle") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  kpelle

rm(mano_kpelle_pre)

kono_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "kono")

kono_pre |> 
  rename(number = Number,
         construction = `Kono construction...8`,
         construction_type = `Kono extended construction`,
         adposition = `Kono adposition`,
         verb = `Kono verb`,
         verb_meaning = `Kono complex verb`) |> 
  mutate(language = "Kono") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  kono

rm(kono_pre)

# I renamed one of the columns to "Looma extended construction"
looma_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "Looma")

looma_pre |> 
  rename(number = Number,
         construction = `Looma construction`,
         construction_type = `Looma extended construction`,
         adposition = `Looma Postposition`,
         verb = `Looma Verb`,
         verb_meaning = `Looma Complex Verb`) |> 
  mutate(language = "Looma") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  looma

rm(looma_pre)

# I renamed one of the columns to "Guro extended construction"
guro_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "Guro")

guro_pre |> 
  rename(number = Number,
         construction = `Guro construction`,
         construction_type = `Guro extended construction`,
         adposition = `Guro Postposition`,
         verb = `Guro Verb`,
         verb_meaning = `Guro Complex Verb`) |> 
  mutate(language = "Guro") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  guro

rm(guro_pre)

dan_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "Dan Gweetaa from Vydrin")

dan_pre |> 
  rename(number = Number,
         construction = `Dan construction`,
         construction_type = `Dan extended construction type`,
         adposition = `Dan Postposition`,
         verb = `Dan Verb`, 
         verb_meaning = `Dan complex verb meaning`) |> 
  mutate(language = "Dan_Gweetaa") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  dan

rm(dan_pre)

bamana_pre <- readxl::read_xlsx("../say_stimuli.xlsx", sheet = "Bambara Vydrin")
bamana_pre |> 
  rename(number = Number,
         construction = `Bamana construction`,
         construction_type = `Bamana extended construction type`,
         adposition = `Bamana Postposition`,
         verb = `Bamana Verb`, 
         verb_meaning = `Bamana complex verb meaning`) |> 
  mutate(language = "Bamana") |> 
  select(number, construction, construction_type, adposition, verb, verb_meaning, language) |> 
  filter(!is.na(construction)) ->
  bamana

guro |> 
  bind_rows(kono, kpelle, looma, mano, dan, bamana) |>
  mutate(construction_type = str_replace_all(construction_type, " N N ", " N ")) |>
  write_csv("../GM_merged.csv", na = "")

readxl::read_xlsx("../say_stimuli.xlsx", sheet = "stimuli_general") |> 
  select(Number, `English predicate`, `Stimulus sentence (SAY)`, `Semantic types`) |> 
  rename(number = Number,
         predicate_eng = `English predicate`,
         stimuli = `Stimulus sentence (SAY)`,
         semantic_type = `Semantic types`) ->
  stimuli

read_csv("../GM_merged.csv") |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |>
  group_by(number) |> 
  add_count(construction_type) |> 
  arrange(number, desc(n)) |> 
  select(-adposition) |> 
  group_by(number, construction_type, language, n) |> 
  reframe(construction = str_c(construction, collapse = "; ")) |> 
  pivot_wider(names_from = language, values_from = construction, values_fill = "") |> 
  group_by(number) |> 
  mutate(id = 1:n()) |>
  ungroup() |> 
  left_join(stimuli) |> 
  select(number, semantic_type, predicate_eng, stimuli, id, n, construction_type, Guro, Looma, Mano, Dan_Gweetaa, Kono, Kpelle, Bamana, Kono) |> 
  writexl::write_xlsx("../GM_merged_wide.xlsx")

read_csv("../GM_merged.csv") |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |>
  select(-construction) |>
  distinct(number, construction_type, language, adposition) |> 
  group_by(number, construction_type, language) |> 
  reframe(adposition = str_c(adposition, collapse = "; ")) |> 
  pivot_wider(names_from = language, values_from = adposition, values_fill = "") |> 
  group_by(number) |> 
  mutate(id = 1:n()) |>
  ungroup() |> 
  left_join(stimuli) |> 
  select(number, semantic_type, predicate_eng, stimuli, id, construction_type, Guro, Looma, Mano, Dan_Gweetaa, Kono, Kpelle, Bamana, Kono) |> 
  writexl::write_xlsx("../GM_merged_wide_adpositions.xlsx")

read_csv("../GM_merged.csv") |> 
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type)) |>
  select(-construction) |>
  distinct(number, construction_type, language, verb) |> 
  group_by(number, construction_type, language) |> 
  reframe(verb = str_c(verb, collapse = "; ")) |> 
  pivot_wider(names_from = language, values_from = verb, values_fill = "") |> 
  group_by(number) |> 
  mutate(id = 1:n()) |>
  ungroup() |> 
  left_join(stimuli) |> 
  select(number, predicate_eng, stimuli, id, construction_type, Guro, Looma, Mano, Dan_Gweetaa, Kpelle, Bamana, Kono) |> 
  writexl::write_xlsx("../GM_merged_wide_verb.xlsx")

read_csv("../GM_merged.csv") |>
  filter(!str_detect(construction, "no equivalent"),
         !is.na(construction_type),
         !is.na(verb_meaning)) |>
  select(-construction) |>
  distinct(number, construction_type, language, verb, verb_meaning) |> 
  mutate(verb_meaning = str_c(verb_meaning, " (", verb, ")")) |> 
  group_by(number, construction_type, language) |>
  reframe(verb_meaning = str_c(verb_meaning, collapse = "; ")) |>
  pivot_wider(names_from = language, values_from = verb_meaning, values_fill = "") |>
  group_by(number) |>
  mutate(id = 1:n()) |>
  ungroup() |>
  left_join(stimuli) |>
  select(number, predicate_eng, stimuli, id, construction_type, Guro, Looma, Mano, Dan_Gweetaa, Kpelle, Bamana, Kono) |>
  writexl::write_xlsx("../GM_merged_wide_verb_meaning.xlsx")